forked from rdpeng/RepData_PeerAssessment1
-
Notifications
You must be signed in to change notification settings - Fork 0
/
PA1_template.Rmd
57 lines (48 loc) · 1.84 KB
/
PA1_template.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# Reproducible Research: Course project 1
## Loading and preprocessing the data
```{r}
activity_file <- unzip("activity.zip")
activity_data <- read.csv(activity_file)
activity_data_na <- activity_data
activity_data_na <- activity_data_na[complete.cases(activity_data_na),]
```
## Mean and Median of the total number of steps taken per day
```{r}
tsed <- aggregate(activity_data_na$steps, by=list(activity_data_na$date), FUN=sum)
plot(tsed$x, type = "h", lwd = 10, lend = "square", xlab = "Days",
ylab = "Number of steps")
mean(activity_data_na$steps)
median(activity_data_na$steps)
```
## Average daily activity pattern
```{r}
asei <- aggregate(activity_data_na$steps, by=list(activity_data_na$interval), FUN=mean)
plot(asei$x, type = "l", xlab = "Interval", ylab = "Number of steps")
max(activity_data_na$steps, na.rm = TRUE)
```
## Imputing missing values
```{r}
missing <- is.na(activity_data$steps)
sum(missing)
activity_data$steps[is.na(activity_data$steps)] <- mean(na.omit(activity_data$steps))
tsed <- aggregate(activity_data$steps, by=list(activity_data$date), FUN=sum)
plot(tsed$x, type = "h", lwd = 10, lend = "square", xlab = "Days",
ylab = "Number of steps")
mean(activity_data$steps)
median(activity_data$steps)
```
## Activity patterns between weekdays and weekends
```{r}
activity_data$weekend <- weekdays(as.Date(activity_data$date)) == 'Saturday' |
weekdays(as.Date(activity_data$date)) == 'Sunday'
weekday_data <- activity_data[activity_data$weekend == FALSE,]
weekend_data <- activity_data[activity_data$weekend == TRUE,]
plot(
aggregate(weekday_data$steps, by=list(weekday_data$interval), FUN=mean),
type = "l", xlab = "Interval", ylab = "Number of steps", main = "Weekday"
)
plot(
aggregate(weekend_data$steps, by=list(weekend_data$interval), FUN=mean),
type = "l", xlab = "Interval", ylab = "Number of steps", main = "Weekend"
)
```