-
Notifications
You must be signed in to change notification settings - Fork 0
/
PolarRead.R
174 lines (118 loc) · 6 KB
/
PolarRead.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# reads csv and tcx files, does some merging, cleaning and saving
# THE BASIC FUNCTION
# 2015_11_01
# 2016_02_20 no longer dumping info on cadence + changing output csv's naming
PolarRead<-function(x) {
# PRASE
if (!require("pacman")) install.packages("pacman")
pacman::p_load(XML, lubridate, plyr)
#### READ + MODIFY INDIVIDUAL FILE'S DATA CSV (outputs "myinfo" and "mycsv" dataframes) ####
# every Polar .csv contains header (general one line info about stats - gets aggregated into infotable later)
cat("\n", "Working on", x, "\n")
path_in <- "C:/Users/hanak/Dropbox/Polar tcx/"
path_out <- "C:/Users/hanak/Dropbox/Polar tcx/Polar_R_dataframes+infos/"
a <- paste(paste(path_in, x, sep = ""), "csv", sep = ".")
### MYINFO
cat("..........Preparing data for the Infotable", "\n")
myinfo<-read.csv(a, header = TRUE, nrows = 1)
# drop the name and "X" column information
myinfo$Name <- NULL
myinfo$X <- NULL
# name the columns
names(myinfo) <- c("sport", "date", "start", "duration", "total.dist", "average.HR", "average.speed", "max.speed", "average.pace", "max.pace", "calories", "fat.percent", "average.cadence", "average.stride.length", "running.index", "trainload", "ascent", "descent", "notes", "height", "weight", "max.HR", "sit.HR", "VO2max")
# assign correct class
myinfo$when = paste(myinfo$date, myinfo$start, sep= " ")
myinfo$when <- as.POSIXct(myinfo$when, format = "%d.%m.%Y %H:%M:%S")
myinfo$duration.s <- period_to_seconds(hms(myinfo$duration))
# drop the obsolete date and start columns
myinfo$date <- NULL
myinfo$start <- NULL
# naming ...
# store the sport type name in a variable - later used for naming of files
sport_type<- paste(myinfo[1,1])
# leave only the date from the original export file name
date<-substr(x, 13, 50)
# save the info file for later
write.csv(myinfo, paste(path_out, paste(date, sport_type, "Info.csv", sep = "_"), sep=""), row.names=FALSE)
### CSV
cat("..........Preparing data for each training", "\n")
mycsv<-read.csv(a, skip = 2)
# drop the sample rate and "X" column information
mycsv$Sample.rate <- NULL
mycsv$X <- NULL
# name the columns
names(mycsv) <- c("time", "HR", "speed.kmh", "pace.minkm", "cadence", "altitude", "stride.length", "distance", "temperature", "power")
# assign correct class
mycsv$time.s <- period_to_seconds(hms(mycsv$time))
mycsv$pace <- period_to_seconds(hms(paste("00", mycsv$pace.minkm)))/60
#### READ + MODIFY INDIVIDUAL FILE'S TRACKPOINTS TCX (outputs "mydf" dataframe) ####
cat("..........Reading and parsing the tcx - that takes some time, it's", sport_type, "\n")
a <- paste(paste(path_in, x, sep = ""), "tcx", sep = ".")
doc = xmlParse(a)
xmlToDataFrame(nodes <- getNodeSet(doc, "//ns:Trackpoint", "ns"))
mydf <- plyr::ldply(nodes, as.data.frame(xmlToList)) #that time consuming part
# drop the sensor information
mydf$value.SensorState <- NULL
# human readable column names
names(mydf)[names(mydf) == 'value.Time'] <- 'when'
names(mydf)[names(mydf) == 'value.AltitudeMeters'] <- 'altitude'
names(mydf)[names(mydf) == 'value.DistanceMeters'] <- 'distance'
names(mydf)[names(mydf) == 'value.Value'] <- 'HR'
names(mydf)[names(mydf) == 'value.Position.LatitudeDegrees'] <- 'lat'
names(mydf)[names(mydf) == 'value.Position.LongitudeDegrees'] <- 'long'
names(mydf)[names(mydf) == 'value.Cadence'] <- 'cadence'
#change classes
mydf$when <- gsub('.{5}$', '', mydf$when)
mydf$when <- as.POSIXct(mydf$when, format = "%Y-%m-%dT%H:%M:%S")
if("altitude" %in% colnames(mydf))
{mydf$altitude<-as.numeric(sub(",", ".", mydf$altitude, fixed = TRUE))
mydf$lat<-as.numeric(sub(",", ".", mydf$lat, fixed = TRUE))
mydf$long<-as.numeric(sub(",", ".", mydf$long, fixed = TRUE))}
if("distance" %in% colnames(mydf))
{mydf$distance<-as.numeric(sub(",", ".", mydf$distance, fixed = TRUE))}
if("HR" %in% colnames(mydf))
{mydf$HR<-as.integer(sub(",", ".", mydf$HR, fixed = TRUE))}
if("cadence" %in% colnames(mydf))
{mydf$cadence<-as.integer(sub(",", ".", mydf$cadence, fixed = TRUE))}
#### MERGE MYCSV + MYDF ####
# possible removal of the last row in mydf or mycsv (both cases have been observed)
cat("..........Your training data is almost ready", "\n")
while (nrow(mydf) != nrow(mycsv))
{
if(nrow(mydf)<nrow(mycsv)) {
mycsv<-mycsv[-nrow(mycsv),]
}else{
mydf<-mydf[-nrow(mydf),]
}
}
# remove duplicit columns # Maybe in the future also null power and stride length(if I ever get these metricss)
mycsv$HR<-NULL
mycsv$altitude <- NULL
mycsv$distance <- NULL
mycsv$cadence <- NULL
# column bind
mymerge <- cbind(mydf, mycsv)
# save the merged file
write.csv(mymerge, paste(path_out, paste(date, sport_type, "data.csv", sep = "_"), sep=""), row.names=FALSE)
cat("..........", x, "is processed and ready for further analysis", "\n")
# FYI:
# mydf:
# when
# lat
# long
# altitude
# distance
# HR
# cadence
# mycsv
# time
# speed.kmh
# pace.minkm
# stride.length
# temperature
# power
# time.s
# pace
}
x<-"hana_Kysela_2016-08-19_14-08-37"
x<-"hana_Kysela_2015-09-12_14-30-53"