-
Notifications
You must be signed in to change notification settings - Fork 1
/
telco.jl
151 lines (117 loc) · 5.03 KB
/
telco.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
using MLJFlux, Flux, MLJ, DataFrames, CSV, StatsBase, Dates
import JSON
using Plots, StatsPlots
plotly()
figuresize = (1600, 1200)
origindata = CSV.read("data/telco-customer-churn/data.csv", DataFrame)
# MODULE 数据清洗
function findNullData(dataframe::DataFrame)
for column in names(dataframe)
missingcount = count(ismissing, dataframe[!, column])
println("$column: \t $missingcount")
end
end
findNullData(origindata)
schema(origindata)
function transformTotalCharges(dataframe::DataFrame)
indexs = dataframe[!, :TotalCharges] .== " "
dataframe[!, :TotalCharges][indexs] .= string.(dataframe[!, :MonthlyCharges][indexs])
dataframe[!, :TotalCharges] = map(x -> parse(Float64, x), dataframe[!, :TotalCharges])
dataframe[!, :tenure][indexs] .= 1
return dataframe
end
transformTotalCharges(origindata)
# MODULE 可视化分析
function plotChurn(dataframe::DataFrame)
counts = countmap(dataframe[!, :Churn])
yescount = counts["Yes"]
nocount = counts["No"]
total = yescount + nocount
xs = ["Yes", "No"]
ys1 = [yescount / total, nocount / total]
ys2 = [yescount, nocount]
pie(xs, ys1, aspect_ratio = :equal) |> display
bar(xs, ys2, size = figuresize) |> display
end
plotChurn(origindata)
# DONE 用户属性分析
function plotPercentages(dataframe::DataFrame, feature::Symbol, ymatrix::Matrix{Float64})
columns = [feature, :Churn]
groupDataframe = groupby(select(dataframe, columns), feature)
xs = []
let
index = 1
for _dataframe in groupDataframe
x = first(_dataframe[!, feature])
push!(xs, x)
yescount = count(isequal("Yes"), _dataframe[!, :Churn])
nocount = count(isequal("No"), _dataframe[!, :Churn])
total = yescount + nocount
ymatrix[index, :] = [yescount / total, nocount / total]
index += 1
end
end
groupedbar(ymatrix, xticks = (1:length(xs), xs), label = ["Yes" "No"]) |> display
end
plotPercentages(origindata, :SeniorCitizen, ones((2, 2)))
plotPercentages(origindata, :gender, ones((2, 2)))
plotPercentages(origindata, :Partner, ones((2, 2)))
plotPercentages(origindata, :Dependents, ones((2, 2)))
density(origindata.tenure, group = origindata.Churn, size = figuresize) |> display
# DONE 服务属性分析
plotPercentages(origindata, :MultipleLines, ones((3, 2)))
plotPercentages(origindata, :InternetService, ones((3, 2)))
function plotPaperlessBillingChurn(dataframe::DataFrame)
columns = [:PaperlessBilling, :Contract, :Churn]
groupDataframe = groupby(select(dataframe, columns), :PaperlessBilling)
array = unique(dataframe[!, :Contract])
for _dataframe in groupDataframe
_dataframe = filter(row -> row.Churn == "Yes", _dataframe)
paperlessbilling = first(_dataframe[!, :PaperlessBilling])
churn1 = count(isequal(array[1]), _dataframe[!, :Contract])
churn2 = count(isequal(array[2]), _dataframe[!, :Contract])
churn3 = count(isequal(array[3]), _dataframe[!, :Contract])
total = churn1 + churn2 + churn3
ys = [churn1 / total, churn2 / total, churn3 / total]
bar(array, ys, title = "PaperlessBilling = $paperlessbilling") |> display
end
end
plotPaperlessBillingChurn(origindata)
function plotNumberOfCustomer(dataframe::DataFrame)
columns = ["PhoneService", "MultipleLines", "OnlineSecurity", "OnlineBackup", "DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies"]
ymatrix = ones((length(columns), 3))
index = 1
for column in columns
_dataframe = select(filter(row -> row.InternetService != "No", dataframe), [column, "Churn"])
count1 = count(isequal("Yes"), _dataframe[!, column])
count2 = count(isequal("No"), _dataframe[!, column])
if column != "MultipleLines"
ymatrix[index, :] = [count1, count2, 0]
else
ymatrix[index, :] = [count1, count2, count(isequal("No phone service"), _dataframe[!, column])]
end
index += 1
end
groupedbar(ymatrix, xticks = (1:length(columns), columns), label = ["Has Service" "No Service" "No Service"], size = figuresize) |> display
end
plotNumberOfCustomer(origindata)
function plotNumberOfChurnCustomer(dataframe::DataFrame)
columns = ["PhoneService", "MultipleLines", "OnlineSecurity", "OnlineBackup", "DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies"]
ymatrix = ones((length(columns), 2))
index = 1
for column in columns
_dataframe = select(filter(row -> row.InternetService != "No" && row.Churn == "Yes", dataframe), [column, "Churn"])
# has service but churn
yescount = count(isequal("Yes"), _dataframe[!, column])
# has no service but churn
nocount = count(isequal("No"), _dataframe[!, column])
ymatrix[index, :] = [yescount, nocount]
index += 1
end
groupedbar(ymatrix, xticks = (1:length(columns), columns), label = ["Has Service" "No Service"], size = figuresize) |> display
end
plotNumberOfChurnCustomer(origindata)
# DONE 合同属性分析
plotPercentages(origindata, :PaymentMethod, ones((4, 2)))
density(origindata.MonthlyCharges, group = origindata.Churn, size = figuresize) |> display
density(origindata.TotalCharges, group = origindata.Churn, size = figuresize) |> display