-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathB题.r
180 lines (175 loc) · 6.43 KB
/
B题.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
library("trend")
library("plyr")
library("forecast")
library("igraph")
library("fitdistrplus")
library("logspline")
# 数据读入
attachment1 <- read.csv("附件1.csv", encoding = "UTF-8")
attachment1[, 1] <- as.Date(attachment1[, 1])
attachment1[, "Day"] <- as.integer(format(attachment1[, 1], "%d"))
attachment1[, "Month"] <- as.integer(format(attachment1[, 1], "%m"))
# 第一问:完成
trend_pre <- read.csv("附件1-趋势预处理.csv", encoding = "UTF-8")
# 发出
deliver <- list()
deliver_sum <- list()
# deliver_diff <- list()
# deliver_trend <- list()
for (i in LETTERS) {
deliver[[i]] <- attachment1[attachment1$Delivering == i, ][, "PCS"]
deliver_sum[i] <- sum(deliver[[i]])
}
# 接收
receive <- list()
receive_sum <- list()
# reveive_diff <- list()
# receive_trend <- list()
for (i in LETTERS) {
receive[[i]] <- attachment1[attachment1$Receiving == i, ][, "PCS"]
receive_sum[i] <- sum(receive[[i]])
}
# 趋势
trend <- list()
trend_p <- list()
trend_statistic <- list()
for (i in LETTERS) {
trend[[i]] <- mk.test(trend_pre[, i])
trend_p[i] <- trend[[i]]["p.value"]
trend_statistic[i] <- trend[[i]]["statistic"]
}
# 相关
cov <- list()
cov_p <- list()
cov_statistic <- list()
for (i in LETTERS) {
cov[[i]] <- mk.test(diff(trend_pre[, i]))
cov_p[i] <- cov[[i]]["p.value"]
cov_statistic[i] <- cov[[i]]["statistic"]
}
# 熵权法
topsis <- as.data.frame(cbind(unlist(deliver_sum), unlist(receive_sum), unlist(trend_statistic), unlist(cov_statistic)))
topsis[is.na(topsis)] <- 0
entropy_positive <- function(x) {
x <- unlist(x)
y <- (x - min(x)) / (max(x) - min(x))
p <- y / sum(y)
entropy <- -1 / log(length(x)) * sum(ifelse(p == 0, 0, p * log(p)))
return(entropy)
}
entropy_data <- colwise(entropy_positive)(topsis)
entropy_weight <- (1 - entropy_data) / sum(1 - entropy_data)
# 标准化
scaled_data <- scale(topsis)
write.csv(scaled_data, "附件1-标准化.csv")
write.csv(entropy_weight, "附件1-权重.csv")
# 结果见 第一问结果.py
# 第二问:完成
index <- sample(c(TRUE, FALSE), length(attachment1), replace = TRUE, prob = c(0.75, 0.25))
train <- attachment1[index, ]
test <- attachment1[!index, ]
fit <- glm(PCS ~ Month + Day + Delivering + Receiving, data = train)
summary(fit)
plot(fit)
result <- predict(fit, newdata = test, type = "response")
result[result < 0] <- 0
# 分层运算
result <- list()
for (i in LETTERS) {
for (j in LETTERS) {
route <- attachment1[attachment1$Delivering == i, ]
route <- route[route$Receiving == j, ]
if (nrow(route)) {
index <- sample(c(TRUE, FALSE), length(route), TRUE, c(0.75, 0.25))
train <- route[index, ]
test <- route[!index, ]
fit <- glm(PCS ~ Month + Day, data = train)
result[[paste(i, j)]] <- predict(fit, newdata = test, type = "response")
}
}
}
# 结果输出
fit <- glm(PCS ~ Month + Day + Delivering + Receiving, data = attachment1)
Month <- c(4, 4, 4, 4, 4, 4, 4, 4)
Day <- c(18, 18, 18, 18, 19, 19, 19, 19)
Delivering <- c("M", "Q", "K", "G", "V", "A", "D", "L")
Receiving <- c("U", "V", "L", "V", "G", "Q", "A", "K")
goal_data <- data.frame(Month, Day, Delivering, Receiving)
result <- predict(fit, newdata = goal_data, type = "response")
goal4.17 <- attachment1[attachment1$X.U.FEFF.Date == "2019-4-17", ]
goal4.18 <- goal4.17
goal4.18$Day <- 18
goal4.19 <- goal4.17
goal4.19$Day <- 19
sum4.18 <- sum(predict(fit, newdata = goal4.18, type = "response"))
sum4.19 <- sum(predict(fit, newdata = goal4.19, type = "response"))
result
sum4.18
sum4.19
# 第三问:完成
attachment2 <- read.csv("附件2.csv", encoding = "UTF-8")
attachment2[, 1] <- as.Date(attachment2[, 1])
attachment2[, "Day"] <- as.integer(format(attachment2[, 1], "%d"))
attachment2[, "Month"] <- as.integer(format(attachment2[, 1], "%m"))
attachment2[, "Year"] <- as.integer(format(attachment2[, 1], "%Y"))
#正式处理
index <- sample(c(TRUE, FALSE), length(attachment2), replace = TRUE, prob = c(0.75, 0.25))
train <- attachment2[index, ]
test <- attachment2[!index, ]
fit <- glm(PCS ~ Year + Month + Day + Delivering + Receiving, data = train)
result <- predict(fit, newdata = test, type = "response")
result[result < 0] <- 0
# 时间序列
time_pre <- read.csv("附件2-时间序列预处理.csv", encoding = "UTF-8") # 可以通过截断csv文件的方法实现预测
time2 <- ts(time_pre, c(2020, 119), frequency = 273)
# 调用此函数, 便可预测路线的在下两天的通断情况
predictRouteAccess <- function(route) {
fit <- ets(time2[, route])
return(forecast(fit, 2))
}
predictRouteAccess("IS")
predictRouteAccess("MG")
predictRouteAccess("SQ")
predictRouteAccess("VA")
predictRouteAccess("YL")
predictRouteAccess("DR")
predictRouteAccess("JK")
predictRouteAccess("QO")
predictRouteAccess("UO")
predictRouteAccess("YW")
# 若能正常发货, 则发货数量预测
fit <- glm(PCS ~ Year + Month + Day + Delivering + Receiving, data = attachment2)
Year <- c(2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023)
Month <- c(4, 4, 4, 4, 4, 4, 4, 4, 4, 4)
Day <- c(28, 28, 28, 28, 28, 29, 29, 29, 29, 29)
Delivering <- c("I", "M", "S", "V", "Y", "D", "J", "Q", "U", "Y")
Receiving <- c("S", "G", "Q", "A", "L", "R", "K", "O", "O", "W")
goal_data <- data.frame(Year, Month, Day, Delivering, Receiving)
result <- predict(fit, newdata = goal_data, type = "response")
# 第四问见 第四问-最短路.py
# 第五问
# 固定
min_pre <- read.csv("附件2-季度最小值.csv", encoding = "UTF-8")
standard <- apply(min_pre, 2, sd)
# 42 × len
# 非固定
distribution_2022_3 <- read.csv("附件2-分季度分布/20223季度.csv", encoding = "UTF-8") - 42
distribution_2023_1 <- read.csv("附件2-分季度分布/20231季度.csv", encoding = "UTF-8") - 42
distribution_all <- read.csv("附件2-全分布.csv", encoding = "UTF-8") - 42
getAll <- function(route, data = distribution_all) {
sorted <- data[, route]
return(sorted)
}
plot(getAll("VN"))
plot(getAll("VQ"))
plot(getAll("JI"))
plot(getAll("OG"))
plot(getAll("VN", distribution_2022_3))
plot(getAll("VQ", distribution_2022_3))
plot(getAll("JI", distribution_2023_1))
plot(getAll("OG", distribution_2023_1))
descdist(as.numeric(na.omit(distribution_all[, "VN"])))
descdist(as.numeric(na.omit(distribution_all[, "VQ"])))
descdist(as.numeric(na.omit(distribution_all[, "JI"])))
descdist(as.numeric(na.omit(distribution_all[, "OG"])))
# 其他数据使用 Excel 处理