Lessons-Rscripts/class_2.Rmd

---
title: "in class coding"
author: "Julia Gallucci"
date: "12/12/2023"
output: html_document
---

Types in R
```{r}
typeof("welcome") #character

typeof(FALSE) #logical

typeof(3.14) #double

typeof(10L) #integer

typeof(10) #double

typeof(10+3i) #complex
```

Logical Vectors
```{r}
logical_vector <- c(TRUE, TRUE, FALSE)
typeof(logical_vector)

compare_vector <- 1:5 %% 2 == 0
typeof(compare_vector)
compare_vector
```

Numeric Vectors
```{r}
double_vector <- c(3.1, -73, 2700)
typeof(double_vector)
length(double_vector)

integer_vector <- c(3L, -73L, 2700L)
typeof(integer_vector)
length(integer_vector)

special_values <- c(-1, 0 , 1 , NA) / 0
special_values

is.finite(special_values)

is.infinite(special_values)

is.na(special_values)

is.nan(special_values)
```

Character Vectors
```{r}
character_vector <- c("hello", "world", "2,000")
character_vector

length(character_vector)
typeof(character_vector)
```

Explicit Coercion
```{r}
character_vector <- c("1","0","1")
typeof(character_vector)
numeric_vector <- as.numeric(character_vector)
typeof(numeric_vector)

as.double(character_vector)
as.integer(character_vector)

logical_vector <- as.logical(numeric_vector)
logical_vector
```

Implicit coercion
```{r}
numeric_vector <- 1:10
numeric_vector

#which are greater than 4

logical_vector <- numeric_vector > 4
typeof(logical_vector)

#1 is coded as TRUE, 0 as FALSE

sum(logical_vector)
mean(logical_vector)
```

Mix type of vectors
```{r}
mix_vector <- c(TRUE, FALSE, 10L)
typeof(mix_vector)

mix_vector <- c(1L,4L, 1.5)
typeof(mix_vector)

mix_vector <- c(1.5,-3.5, "a")
typeof(mix_vector)
```

Using tidyverse
```{r}
library(tidyverse)
is_logical(c(TRUE,FALSE))
is_double(c(1.2,1.5))
is_integer(c(5L,10L))
is_character(c("cat","dog"))
```

vector recycling
```{r}
1:5
1:10

1:5 + 1:10

```

Name vectors
```{r}
name_vectors<- c(
  a = 100,
  b = 90,
  c = 80,
  d = 70,
  e = 60
)

name_vectors

name_vectors[3]
name_vectors[c(3,3,4)]
name_vectors[c(-1,-2,-5)]
name_vectors[c(TRUE,TRUE,FALSE,TRUE,FALSE)]
name_vectors[name_vectors %% 20 == 0 ]

name_vectors[c("a","c")]
```

List
```{r}
mylist <- list(7,"abc",FALSE)
mylist

str(mylist) #structure of our list

mylist <- list(a = 1:4,
               b = "zyx",
               c = list(-1,-5))
mylist

mylist[[2]]
mylist[["b"]]
mylist$b
```

Tibble
```{r}
mytibble <- tibble(x = 1:5,
                   y = 1, 
                   z = x^2 + y)
mytibble
```
```{r}
data("iris")
head(iris)

iris_tibble <- as_tibble(iris)
iris_tibble
```

subset a data frame
```{r}
iris$Species
iris[["Species"]]

iris %>%
  .$Species

iris %>%
  .[["Species"]]
```

Strings
```{r}
library(stringr)

"this"
'this'

str_length("This is a string")
str_c("This is a string","6")
str_sub("This is a string",start = 7,end = 12)

str_to_lower("UPPER CASE")
str_to_upper("lower case")
str_to_title("no capitalization")

mystring <- c("apple","banana",
              "clementime","dragonfruit")
str_view(mystring, pattern = "an")

str_view(mystring, pattern = ".a.")

str_view(mystring, pattern = "^a")
str_view(mystring, pattern = "a$")
str_view(mystring, pattern = "[^a,e,u]")

mystring <- "abcccdeee"
str_view(mystring, pattern = "cc*")

mystring <- c("abab", "cdcd", "efgh")
str_view(mystring, pattern = "(..)\\1", match = TRUE)

mystring <- c("banana", "dodo", "apple")

?str_detect()
str_detect(mystring, pattern = "(..)\\1")
str_subset(mystring, pattern = "(..)\\1")

str_count(mystring, pattern = "(..)\\1") # TRUES are coded as 1, FALSE as 0 
```


Factors
```{r}
library(forcats)
months <- c("Dec","Apr", "Jan","Mar")
months

month_levels <- c(
  "Jan","Feb","Mar","Apr",
  "May", "Jun","Jul","Aug",
  "Sep","Oct","Nov","Dec"
)

month_fix <- factor(months, levels = month_levels)
month_fix

#fct_recode(data, New name = Old name)
fct_recode(month_fix, "December" = "Dec")
```

Dates
```{r}
today()
now()

library(lubridate)
#as_datetime(<POSIXct item>)

ymd("2017-01-31")
ymd(20170131)
mdy("January 31st, 2017")
dmy("31-Jan-2017")

today()
as_datetime(today())
now()
as_date(now())

datetime <- ymd_hms("2016-07-08 12:34:56")

year(datetime)
month(datetime)
mday(datetime)
yday(datetime)
wday(datetime)
hour(datetime)
minute(datetime)
second(datetime)
```

Time spans
```{r}
today() - ymd(20000101)
as.duration(today() - ymd(20000101))
dseconds(120)
dminutes(60)
dhours(c(12,24))
ddays(0:7)
dweeks(4)
dyears(10)

#time periods
today() + years(1)
today() + months(1)
today() + days(1)
today() + hours(1)
today() + minutes(1)
today() + seconds(1)

#time zones
ymd_hms("2021-01-01 12:00:00", tz = "America/New_York")

ymd_hms("2021-01-01 12:00:00", tz = "Europe/Copenhagen")

ymd_hms("2021-01-01 12:00:00", tz = "Pacific/Auckland")
```

Missing data
```{r}
NA > 5
NA == 10 
NA + 5
NA == NA 

#How we detect missing values
is.na(NA)
```

Exercises
1. Make a tibble where the vectors do not have equal length. What happens?

```{r}
mytibble <- tibble(
  A = 1:10,
  B = 1:2
)
mytibble
#Tibbles always have to have vectors (columns) of equal lengths
```

2. In the following tibble, extract variable B:
```{r}
mytibble <- tibble(
  A = 1:10,
  B = A * 2
)
mytibble

#using the column name "B"
mytibble[["B"]]
mytibble$B
mytibble %>%
  .$B

#use the column index 2
mytibble[[2]]


mytibble$A
mytibble[[1]]
mytibble[["A"]]


mytibble
# data[row,column]
mytibble[2,2]
mytibble[2,]
mytibble[,2]
mytibble[c(1:4),]
mytibble[-c(1:4),]
```

3. Try using functions paste() and paste0(). Compare them to str_c(), how do they work differently?
```{r}
?paste
?paste0
?str_c

paste("Hi","my", "name","is","julia", sep = " ")

paste0("Hi","my", "name","is","julia")
#similarly
paste("Hi","my","name","is","julia", sep = "")

str_c("Hi","my", "name","is","julia")
#default separator is nothing, same as paste0
```

4. Look up the function str_trim() and demonstrate application.
```{r}
#Michael's example:
#remove white space on both sides

teststring <-"the string"
newstring <-str_pad(teststring,50,side=c("both"))
newstring
str_trim(newstring)


#Julia's example:
str_trim("Hi my name is Julia       ")

#FM's example:
str_trim("  Hello world ")
```

5. Given the text "Hello, world! \\." match the sequence "\" with a regular expression
```{r}
text <- "Hello, world! \\."
#Henry's answer:
str_view(text, pattern = "\\\\")

```

6. Given the text "x-ray" match words that start with x with a regular expression
```{r}
text <- c("x-ray", "cat", "excellent")
str_view(text,pattern = "^x")
```
7. Given the text "cat, hat, dog, rat" match words that contain the regular expression at
```{r}
text <- c("cat", "hat", "dog", "rat")
str_view(text, pattern = ".at")
str_view(text, pattern = "at")
```
8. Given the text LETTERS (A-Z), match only those of vowels with a regular expression
```{r}
LETTERS
letters

str_view(LETTERS, pattern = "[A,E,I,O,U]") #vowels
str_view(LETTERS, pattern = "[^A,E,I,O,U]") #everything except vowels
```
9. What does ^.*$ match?
```{r}
str_view(1:10, "^.*$")

text <- "Hello world"

str_view(text, "^.*$")
#EVERYTHING!
```
10. What would happen if we parse a string with invalid dates?
```{r}
ymd("2023-02-30")
```