Created by Safe
We can create variable by using <-
it mean assign value (right) to variable (left)
# create variable
x <- 1
y <- 2
x+y
print(x + y)
z <- x+y
z
There are 3 types: Numeric, Logical, Character
varA <- 100 # Numeric ex 1, 1.0
varB <- TRUE # Logical: TRUE, FALSE
varC <- "Hello" # Character
sample <- c("M","F","M","F","F")
gender <- factor(sample)
summary(gender)
vec1 <- c() # Vector
list <- list() # List
world <- data.frame() # Data Frame
help(name) # Help Document
length(object) # number of elements or components
str(object) # structure of an object
class(object) # class or type of an object
summary(object) # result summaries
mean(), median(), min(), max(), quartile(), range(), sd(), var(), iqr()
mean(nums,na.rm = TRUE)
to handle NA (Not Available’ / Missing Values) Value
# Finding mode
library(DescTools)
Mode()
data()
read.csv("filename")
# readr package
library(readr) # A fast and friendly way to read rectangular data
read_csv("filename")
write_csv(data,file = "filename")
View(obj) # Invoke a Data Viewer
head() # See 6 observation (rows)
str() # Structure of object
colnames() # Column Name
rownames() # Row Name
colMeans() # Mean each column
rowMeans() # Mean each row
colSums() # Sum each column
rowSums() # Sum each row
table() # Cross Tabulation and Table Creation
mtcars[1,] # Select first row with all column
mtcars[,1] # Select all row with only first column
install.packages(name) # Install Package
library(name) # Loading/Attaching and Listing of Packages
help(package=name) # Read description
dplyr
: data manipulationtidyr
: help you create tidy datareadr
: A fast and friendly way to read rectangular datastringr
: Character manipulationassertive
: Readable check functions to ensure code integritylubridate
: Handle datetime format
library(dplyr)
cars <- as_tibble(mtcars) # enhanced version of data.frames
glimpse(cars) # print the data similar str() function
cars %>% head() # Using The Pipes Operator in R
select() # picks variables based on their names.
filter() # picks cases based on their values.
arrange() # changes the ordering of the rows.
group_by() # takes a data frame and one or more variables to group by
summarise() # reduces multiple values down to a single summary.
mutate() # adds new variables that are functions of existing variables
transmute() # adds new variables and drops existing ones.
rename() # renaming columns
count() # Count observations by group
For example
starwars %>% select(name,height)
starwars %>% filter(sex == "male",skin_color == "light")
starwars %>% arrange(height)
starwars %>% summarise(height = mean(height, na.rm = TRUE))
starwars %>% group_by(sex) %>% select(height) %>% summarise(avg = mean(height, na.rm = TRUE))
starwars %>% rename(hair=hair_color)
starwars %>% mutate(height_m = height / 100)
starwars %>% transmute(height_m = height / 100)
library(assertive) # Readable check functions to ensure code integrity
- Logical checking: returns TRUE or FALSE
assertive
checking: errors when FALSE (using assertive package)
# Logical checking
is.character()
is.numeric()
is.logical()
is.factor()
is.Date()
is.na()
# assertive
assert_is_character()
assert_is_numeric()
...
as.character()
as.factor()
as.numeric()
library(stringr) # Character manipulation
str_trim(string) # Trim whitespace from a string
str_remove(string, pattern) # Remove matched patterns in a string
str_split(string) # Split up a string into pieces
str_sub(string, start = 1L, end = -1L) # Extract and replace substrings from a character vector
duplicated() # return a logical vector, if duplicate will return TRUE.
distinct() # Select only unique/distinct rows from a data frame.
Using with dplyr package
products %>% duplicated() %>% sum()
products %>% distinct()
na.omit()
replace(col, condition, replacement) # Replace value with condition
assert_all_are_in_closed_range(col, lower = l1, upper = u1) # Check in range of [l1,u1]