-
Notifications
You must be signed in to change notification settings - Fork 1
/
interpolate.R
45 lines (36 loc) · 1.43 KB
/
interpolate.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
library("tidyverse")
library("lubridate")
data_dir <- "/Users/krissankaran/.kaggle/competitions/short-term-load-forecasting-challenge"
x <- read_csv(file.path(data_dir, "train.csv")) %>%
arrange(Month, DayOfTheMonth, Hour, Minute) %>%
mutate(
datetime = sprintf("2019-%s-%s %s:%s:00", str_pad(Month, 2, "l", "0"),
str_pad(DayOfTheMonth, 2, "l", "0"),
str_pad(Hour, 2, "l", "0"),
str_pad(Minute, 2, "l", "0"))
) %>%
filter(DayOfTheMonth != 29) %>%
mutate(
datetime = as_datetime(datetime),
numeric_datetime = as.numeric(datetime)
)
times <- seq(min(x$numeric_datetime), max(x$numeric_datetime), 6000)
interpolated <- list()
for (j in seq_len(ncol(x))) {
interpolated[[colnames(x)[j]]] <- approx(x$numeric_datetime, x[[j]], times, ties="mean", method = "constant")$y
}
interpolated$datetime <- times
interpolated <- as.data.frame(interpolated)
ggplot(interpolated %>% filter(datetime < 1549446506, datetime > 1547869367)) +
geom_point(aes(x = datetime, y = Forecast))
## ggplot(x %>% filter(numeric_datetime < 1549446506, numeric_datetime > 1547869367)) +
## geom_point(aes(x = datetime, y = Forecast))
## ggplot(x) +
## geom_point(
## aes(x = Hour, y = Forecast),
## position = position_jitter(w = 0.1),
## alpha = 0.2
## ) +
## facet_grid(Month ~ DayOfTheWeek)
## plot(x$AirLoad1, x$Forecast)
write_csv(interpolated, "train_interp.csv")