-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDV Assignment-11_R file-1 (1).R
185 lines (135 loc) · 6.46 KB
/
DV Assignment-11_R file-1 (1).R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
library(dplyr)
library(tidyverse)
library(ggplot2)
data <- read.csv("/Users/sailokeshsiddanathi/Documents/dvp.csv",header= TRUE, sep = ",")
head(data,5)
#encoding conversions as 0 and 1 (instead of "yes" and "no")
data <- data %>%
mutate(Response=ifelse(Response=="No", 0, 1))
data$Response <- as.integer(data$Response)
#rename column Response to Engaged
data <- data %>%
rename(Engaged=Response)
#calculating engagement and conversion rate
EngagementRate <- data %>%
group_by(Engaged) %>%
summarize(Count=n()) %>%
mutate(Percentage=Count/nrow(data)*100)
#calculate engagement/conversion rate
EngagementRate <- data %>%
group_by(Engaged) %>%
summarize(Count=n()) %>%
mutate(Percentage=Count/nrow(data)*100)
#transposing the Column
transposed <- t(EngagementRate)
colnames(transposed) <- EngagementRate$Engaged
transposed <- transposed[-1,]
transposed
#To Analyze the engagement rates for different channels
SalesChannel <- data %>%
group_by(Engaged, Sales.Channel) %>%
summarize(Count=n()) %>%
arrange(Sales.Channel)
SalesChannel
options(repr.plot.width = 15, repr.plot.height = 8)
ggplot(SalesChannel, aes(x="", y=Count, fill=Sales.Channel)) +
geom_bar(stat = "identity", position = position_fill()) +
geom_text(aes(x=1.15, label=Count), position = position_fill(vjust=0.5)) +
coord_polar("y") + facet_wrap(~Engaged) +
ggtitle("Sales Channel_SiddanaEDA (0: Not Engaged, 1: Engaged)") +
theme(legend.position = "bottom", legend.text=element_text(size=18), plot.title = element_text(size=20))
#As we can see on the pie charts, more than a half engaged customers came from Agents,
#while non-engaged customers distributed more evenly across all 4 channels.
conversionsState <- data %>%
group_by(State) %>%
summarize(TotalCount=n(), NumConversions=sum(Engaged)) %>%
mutate(ConversionRate=NumConversions/TotalCount*100)
conversionsState
conversionsEdu <- data %>%
group_by(Education) %>%
summarize(TotalCount=n(), NumConversions=sum(Engaged)) %>%
mutate(ConversionRate=NumConversions/TotalCount*100)
conversionsEdu
ggplot(conversionsEdu, aes(x=Education, y=ConversionRate)) +
geom_bar(width=0.5, stat = "identity", fill="Blue") +
labs(title="Conversion Rates by Education_SiddanaEDA") +
theme(axis.text.x = element_text(size=14),axis.text.y = element_text(size=12),
axis.title = element_text(size = 15), plot.title = element_text(size=15))
#Result : Customers with Doctoral and Master's degrees tend to engage more.
#employment status
conversionsEmp <- data %>%
group_by(EmploymentStatus) %>%
summarize(TotalCount=n(), NumConversions=sum(Engaged)) %>%
mutate(ConversionRate=NumConversions/TotalCount*100)
conversionsEmp
ggplot(conversionsEmp, aes(x=EmploymentStatus, y=ConversionRate)) +
geom_bar(width=0.5, stat = "identity", fill="Red") +
labs(title="Conversion Rates by Employment Status") +
theme(axis.text.x = element_text(size=14),axis.text.y = element_text(size=14),
axis.title = element_text(size = 18), plot.title = element_text(size=18))
#Result : The engagement rate among retired customers is much higher compared to other groups.
#gender
conversionsGender <- data %>%
group_by(Gender) %>%
summarize(TotalCount=n(), NumConversions=sum(Engaged)) %>%
mutate(ConversionRate=NumConversions/TotalCount*100)
conversionsGender
ggplot(conversionsGender, aes(x=Gender, y=ConversionRate)) +
geom_bar(width=0.2, stat = "identity", fill="darkgreen") +
labs(title="Conversion Rates by Gender") +
theme(axis.text.x = element_text(size=14),axis.text.y = element_text(size=14),
axis.title = element_text(size = 18), plot.title = element_text(size=18))
#Result :I don't see any significant difference in the engagement rate between male and female customers.
#location code
conversionsLocation <- data %>%
group_by(Location.Code) %>%
summarize(TotalCount=n(), NumConversions=sum(Engaged)) %>%
mutate(ConversionRate=NumConversions/TotalCount*100)
ggplot(conversionsLocation, aes(x=Location.Code, y=ConversionRate)) +
geom_bar(width=0.5, stat = "identity", fill="Yellow") +
labs(title="Conversion Rates by Location Code_Siddanas") +
theme(axis.text.x = element_text(size=14),axis.text.y = element_text(size=14),
axis.title = element_text(size = 18), plot.title = element_text(size=18))
#Result : Customers with the location code "suburban" have a 2 times higher engagement rate.
#marital status
conversionsMarital <- data %>%
group_by(Marital.Status) %>%
summarize(TotalCount=n(), NumConversions=sum(Engaged)) %>%
mutate(ConversionRate=NumConversions/TotalCount*100)
conversionsMarital
ggplot2(conversionsMarital, aes(x=Marital.Status, y=ConversionRate)) +
geom_bar(width=0.5, stat = "identity", fill="magenta") +
labs(title="Conversion Rates by Marital Status") +
theme(axis.text.x = element_text(size=18),axis.text.y = element_text(size=18),
axis.title = element_text(size = 20), plot.title = element_text(size=22))
#Result : Divorced customers are more likely to engage.
#Regression Analysis (continuous variables)
#get numeric columns
continuous <-select_if(data, is.numeric)
#fit regression model with continuous variables
logit.fit <- glm(Engaged ~ ., data=continuous, family=binomial)
summary(logit.fit)
logit.fit <-glm(Engaged ~ factor(Education) + factor(Gender)+ factor(State) +
factor(Coverage) + factor(EmploymentStatus) +
factor(Location.Code) + factor(Marital.Status)+
factor(Policy.Type) + factor(Policy) + factor(Renew.Offer.Type) +
factor(Sales.Channel) + factor(Vehicle.Class) +
factor(Vehicle.Size), data = data, family=binomial)
summary(logit.fit)
#Regression Analysis (continuous + categorical variables)
continuous$Education <- factor(data$Education)
continuous$Gender <- factor(data$Gender)
continuous$State <- factor(data$State)
continuous$Coverage <- factor(data$Coverage)
continuous$EmploymentStatus <- factor(data$EmploymentStatus)
continuous$Location.Code <- factor(data$Location.Code)
continuous$Marital.Status <- factor(data$Marital.Status)
continuous$Policy.Type <- factor(data$Policy.Type)
continuous$Policy <- factor(data$Policy)
continuous$Renew.Offer.Type <- factor(data$Renew.Offer.Type)
continuous$Sales.Channel <- factor(data$Sales.Channel)
continuous$Vehicle.Class <- factor(data$Vehicle.Class)
continuous$Vehicle.Size <- factor(data$Vehicle.Size)
#fit regression model with continuous and categorical variables
logit.fit <- glm(Engaged ~ ., data=continuous, family=binomial)
summary(logit.fit)