Skip to content

Latest commit

 

History

History
118 lines (85 loc) · 4.05 KB

PA1_template.md

File metadata and controls

118 lines (85 loc) · 4.05 KB
title output
Assignment: Activity monitoring
html_document

Loading and preprocessing the data

#setwd("./RepData_PeerAssessment1/")
unzip("activity.zip")
ActData <- read.csv("activity.csv", header = TRUE, sep = ",")

Calcualting the mean total number of steps taken per day

TotalNumOfStepsPerDay = aggregate(ActData$steps, by = list(ActData$date), sum, na.rm = TRUE)
hist(TotalNumOfStepsPerDay$x, breaks = length(TotalNumOfStepsPerDay$Group.1), main = "Total Number of Steps Taken Per Day", xlab = "Total Number of Steps Taken Per Day")

plot of chunk unnamed-chunk-2

MeanStep = mean(TotalNumOfStepsPerDay$x, na.rm = TRUE)
MedianStep = median(TotalNumOfStepsPerDay$x, na.rm = TRUE)
cat("Mean Number of Steps Taken Per Day: ", MeanStep)
## Mean Number of Steps Taken Per Day:  9354.23
cat("Median Number of Steps Taken Per Day: ", MedianStep)
## Median Number of Steps Taken Per Day:  10395

Outputing the average daily activity pattern

AverageNumOfStepsPerInt = aggregate(ActData$steps, by = list(ActData$interval), mean, na.rm = TRUE)
plot(AverageNumOfStepsPerInt, type = "l", main = "Average Number of Steps Taken Per 5-minute Interval In a Day", xlab = "Interval", ylab = "Average Number of Steps", ylim=c(0, 250))

plot of chunk unnamed-chunk-3

MaxNumOfStepsInt = AverageNumOfStepsPerInt$Group.1[which.max(AverageNumOfStepsPerInt$x)]
cat("The Interval with Maximal Number of Steps Per 5-minute Interval In a Day: ", MaxNumOfStepsInt)
## The Interval with Maximal Number of Steps Per 5-minute Interval In a Day:  835

Imputing missing values
Note: NewActData is the new data set where missing data is replaced with the mean for the 5-minute interval
Report: The mean and median are different from the first part of this assignment. Imputing missing data on the estimates of steps increases the mean and median value.

TotalNumOfMissingVal = sum(is.na(ActData$steps))
NewActData <- ActData
NewActData[(which(is.na(NewActData$steps))),1] <- rep(AverageNumOfStepsPerInt$x,8)
NewTotalNumOfStepsPerDay = aggregate(NewActData$steps, by = list(NewActData$date), sum, na.rm = TRUE)
hist(NewTotalNumOfStepsPerDay$x, breaks = length(NewTotalNumOfStepsPerDay$Group.1), main = "Total Number of Steps Taken Per Day", xlab = "Total Number of Steps Taken Per Day")

plot of chunk unnamed-chunk-4

NewMeanStep = mean(NewTotalNumOfStepsPerDay$x, na.rm = TRUE)
NewMedianStep = median(NewTotalNumOfStepsPerDay$x, na.rm = TRUE)
cat("Mean Number of Steps Taken Per Day after Imputing: ", NewMeanStep)
## Mean Number of Steps Taken Per Day after Imputing:  10766.19
cat("Median Number of Steps Taken Per Day after Imputing: ", NewMedianStep)
## Median Number of Steps Taken Per Day after Imputing:  10766.19

Activity pattern between weekdays and weekends
Note: DaysData is the new factor used to indicate "weekdays" and "weekends"

DaysData =  weekdays(as.Date(NewActData$date))
DaysData[which (DaysData %in% c("Saturday", "Sunday"))] <- "Weekends"
DaysData[which (DaysData %in% c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday"))] <- "Weekdays"
WeekActData <- cbind(NewActData,DaysData)
NonWkdData <- subset(WeekActData[which(WeekActData$DaysData == "Weekdays"),])
WkdData <- subset(WeekActData[which(WeekActData$DaysData == "Weekends"),])
NonWkdAverageNumOfStepsPerInt = aggregate(NonWkdData$steps, by = list(NonWkdData$interval), mean, na.rm = TRUE)
WkdAverageNumOfStepsPerInt = aggregate(WkdData$steps, by = list(WkdData$interval), mean, na.rm = TRUE)
par(mfrow = c(1,2), oma = c(0, 0, 2, 0))
plot(NonWkdAverageNumOfStepsPerInt, type = "l",xlab = "Interval", ylab = "Average Number of Steps",  ylim=c(0, 250))
plot(WkdAverageNumOfStepsPerInt, type = "l", xlab = "Interval", ylab = "Average Number of Steps", ylim=c(0, 250))
title(main = "Average Number of Steps Taken Per 5-minute Interval During Weekdays", outer = TRUE)

plot of chunk unnamed-chunk-5