-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathRFM Analysis (Sales Data).sql
300 lines (266 loc) · 10.1 KB
/
RFM Analysis (Sales Data).sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
/*
RFM Analysis | Value Segmentation | Customer Segmentation
Skills used: Joins, Unions, CTE's, Temp Tables,Views, Windows Functions, Aggregate Functions, CASE, Converting Data Types
--==> This means insights/inferences
*/
--Lets have a look at the data
Select top 10 * FROM PortfolioProjects..['Sales Orders Data']
--Get the range of dates for the order data
Select
MAX(OrderDate) AS MAX,
MIN(OrderDate) AS MIN
FROM PortfolioProjects..['Sales Orders Data']
--==> Data is from May 2018 to Dec 2020
--Since its a bit out-dated data, so lets declare a today variable for better calculations
DECLARE @today_date AS DATE = '2021-01-31';
--Calculating the RFM
SELECT
_CustomerID AS CustomerID
,Datediff(day,MAX(OrderDate),@today_date) AS Recency
,Count(OrderNumber) AS Frequency
,Sum([Unit Price] - ([Unit Price]*[Discount Applied] - [Unit Cost])) AS Monetary_Value
FROM PortfolioProjects..['Sales Orders Data']
GROUP BY _CustomerID
---------------------------------------------------------------------------------------------------------------------------------
---Lets understand the distribution of RFM Values by Five Number Summary
--Calculate RFM Values
DECLARE @today_date AS DATE = '2021-01-01';
WITH RFM_CALC AS (
SELECT
_CustomerID AS CustomerID
,Datediff(day,MAX(OrderDate),@today_date) AS Recency
,Count(OrderNumber) AS Frequency
,CAST(Sum([Unit Price] - ([Unit Price]*[Discount Applied] - [Unit Cost])) AS decimal(16,2)) AS Monetary_Value
FROM PortfolioProjects..['Sales Orders Data']
GROUP BY _CustomerID
),
--Minimum & Maximum Values
MinMax AS (
Select
Min(Recency) AS Rmin,
Max(Recency) AS Rmax,
Min(Frequency) AS Fmin,
Max(Frequency) AS Fmax,
Min(Monetary_Value) AS Mmin,
Max(Monetary_Value) AS Mmax
FROM RFM_CALC
)
--Fivenumber Summary for Monetary Value
SELECT DISTINCT
'Monetary Value' AS RFM,
M.Mmin AS Min,
PERCENTILE_DISC(0.25) WITHIN GROUP (ORDER BY Monetary_Value) OVER () as Q1,
PERCENTILE_DISC(0.50) WITHIN GROUP (ORDER BY Monetary_Value) OVER () as Median,
PERCENTILE_DISC(0.75) WITHIN GROUP (ORDER BY Monetary_Value) OVER () as Q3,
M.Mmax AS Max
FROM MinMax M JOIN RFM_CALC ON 1=1
UNION
--Fivenumber Summary for Frequency
SELECT DISTINCT
'Frequency' AS RFM,
F.Fmin AS Min,
PERCENTILE_DISC(0.25) WITHIN GROUP (ORDER BY Frequency) OVER () as Q1,
PERCENTILE_DISC(0.50) WITHIN GROUP (ORDER BY Frequency) OVER () as Median,
PERCENTILE_DISC(0.75) WITHIN GROUP (ORDER BY Frequency) OVER () as Q3,
F.Fmax AS Max
FROM MinMax F JOIN RFM_CALC ON 1=1
UNION
--Fivenumber Summary for Recency
SELECT DISTINCT
'Recency' AS RFM,
R.Rmin AS Min,
PERCENTILE_DISC(0.25) WITHIN GROUP (ORDER BY Recency) OVER () as Q1,
PERCENTILE_DISC(0.50) WITHIN GROUP (ORDER BY Recency) OVER () as Median,
PERCENTILE_DISC(0.75) WITHIN GROUP (ORDER BY Recency) OVER () as Q3,
R.Rmax AS MAX
FROM MinMax R JOIN RFM_CALC ON 1=1
--==> Data is righly-skewed
---------------------------------------------------------------------------------------------------------------------------------
----lets partition RFM Values on the scale of 1 to 5 scores as the ranges of RFM are not very big
--Lets calculate RFM Values
DECLARE @today_date AS DATE = '2021-01-01';
WITH RFM_CALC AS (
SELECT
_CustomerID AS CustomerID
,Datediff(day,MAX(OrderDate), @today_date) AS Recency
,Count(OrderNumber) AS Frequency
,CAST(Sum([Unit Price] - ([Unit Price]*[Discount Applied] - [Unit Cost])) AS decimal(16,2)) AS Monetary_Value
FROM PortfolioProjects..['Sales Orders Data']
GROUP BY _CustomerID
)
-- Calculate RMF Scores
SELECT
CustomerID
,Recency
,Frequency
,Monetary_Value
,NTILE(5) OVER(ORDER BY Recency DESC) AS Recency_Score
,NTILE(5) OVER(ORDER BY Frequency ASC) AS Frequency_Score
,NTILE(5) OVER(ORDER BY Monetary_Value ASC) AS Monetary_Score
FROM
RFM_CALC
ORDER BY
CustomerID
---------------------------------------------------------------------------------------------------------------------------------
----Lets store the above result as a temporary table for further analytics
--Lets calculate RFM Values
WITH RFM_CALC AS (
SELECT
_CustomerID AS CustomerID
,Datediff(day,MAX(OrderDate),'2021-01-01') AS Recency
,Count(OrderNumber) AS Frequency
,CAST(Sum([Unit Price] - ([Unit Price]*[Discount Applied] - [Unit Cost])) AS decimal(16,2)) AS Monetary_Value
FROM PortfolioProjects..['Sales Orders Data']
GROUP BY _CustomerID
)
-- Calculate RMF Scores
SELECT
CustomerID
,Recency
,Frequency
,Monetary_Value
,NTILE(5) OVER(ORDER BY Recency DESC) AS Recency_Score
,NTILE(5) OVER(ORDER BY Frequency ASC) AS Frequency_Score
,NTILE(5) OVER(ORDER BY Monetary_Value ASC) AS Monetary_Score
INTO #RFM_Value_Score
FROM
RFM_CALC
---------------------------------------------------------------------------------------------------------------------------------
----Lets check the Ranges of RFM by Scores using the temp table created above
WITH Recency_Range AS (
Select
row_number() Over(Order by Recency_Score) AS I,
Recency_Score,
Min(Recency) AS Rmin,
Max(Recency) AS Rmax
FROM #RFM_Value_Score
GROUP BY Recency_Score
),
Frequency_Range AS (
Select
row_number() Over(Order by Frequency_Score) AS I,
Frequency_Score,
Min(Frequency) AS Fmin,
Max(Frequency) AS Fmax
FROM #RFM_Value_Score
GROUP BY Frequency_Score
),
Monetary_Range AS (
Select
row_number() Over(Order by Monetary_Score) AS I,
Monetary_Score,
Min(Monetary_Value) AS Mmin,
Max(Monetary_Value) AS Mmax
FROM #RFM_Value_Score
GROUP BY Monetary_Score
)
Select
Recency_Score,Rmin,Rmax,
Frequency_Score,Fmin,Fmax,
Monetary_Score,Mmin,Mmax
FROM Recency_Range R
Join Frequency_Range F
On R.I = F.I
Join Monetary_Range M
On R.I = M.I
---------------------------------------------------------------------------------------------------------------------------------
----Create the Value Segments & Customer Segments based on RFM Score & Average RFM Score & store as a View for further Analytics & Visualization
--As we can't use the variable directly in the View, Lets create a Function to get the Recency``1
CREATE FUNCTION GetRecency(@today_date DATE, @orderDate DATE)
RETURNS INT
AS
BEGIN
RETURN DATEDIFF(day, @orderDate, @today_date);
END;
--Drop View if exixts
DROP VIEW IF EXISTS RFM_View;
--Create a View for RFM Values & RFM Scores
CREATE VIEW RFM_View AS
--Calculate RFM Values
WITH RFM_CALC AS (
SELECT
_CustomerID AS CustomerID,
dbo.GetRecency('2021-01-01', MAX(OrderDate)) AS Recency,
COUNT(OrderNumber) AS Frequency,
CAST(SUM([Unit Price] - ([Unit Price]*[Discount Applied] - [Unit Cost])) AS decimal(16,2)) AS Monetary_Value
FROM PortfolioProjects..['Sales Orders Data']
GROUP BY _CustomerID
),
-- Calculate RMF Scores
RFM_SCORES AS (
SELECT
CustomerID
,Recency
,Frequency
,Monetary_Value
,NTILE(5) OVER(ORDER BY Recency DESC) AS Recency_Score
,NTILE(5) OVER(ORDER BY Frequency ASC) AS Frequency_Score
,NTILE(5) OVER(ORDER BY Monetary_Value ASC) AS Monetary_Score
FROM RFM_CALC
),
-- Calculate Avg RFM Score
RFM_AVG_SCORE AS (
Select
CustomerID
,CONCAT_WS('-',Recency_Score,Frequency_Score,Monetary_Score) AS R_F_M
,CAST((CAST(Recency_Score AS Float) + Frequency_Score + Monetary_Score)/3 AS DECIMAL(16,2)) AS Avg_RFM_Score
FROM RFM_SCORES
)
Select
T1.CustomerID
,Recency,Frequency,Monetary_Value
,Recency_Score,Frequency_Score,Monetary_Score
,R_F_M,Avg_RFM_Score
FROM RFM_SCORES T1
JOIN RFM_AVG_SCORE T2
ON T1.CustomerID = T2.CustomerID
SELECT TOP 10 * FROM RFM_View ORDER BY Avg_RFM_Score
--Drop View if already exists
DROP VIEW IF EXISTS Customer_Segmentaion;
----Create a View for the Customer Segments & Value Segments using the View "RFM_View"
CREATE VIEW Customer_Segmentaion AS
Select *
, CASE WHEN Avg_RFM_Score >= 4 THEN 'High Value'
WHEN Avg_RFM_Score >= 2.5 AND Avg_RFM_Score < 4 THEN 'Mid Value'
WHEN Avg_RFM_Score > 0 AND Avg_RFM_Score < 2.5 THEN 'Low Value'
END AS Value_Seg --Value Segment
, CASE WHEN Frequency_Score >= 4 and Recency_Score >= 4 and Monetary_Score >= 4 THEN 'VIP'
WHEN Frequency_Score >= 3 and Monetary_Score < 4 THEN 'Regular'
WHEN Recency_Score <= 3 and Recency_Score > 1 THEN 'Dormat'
WHEN Recency_Score = 1 THEN 'Churned'
WHEN Recency_Score >= 4 and Frequency_Score <= 4 THEN 'New Customer'
END AS Cust_Seg --Customer Segment
FROM RFM_View
SELECT TOP 10 * FROM Customer_Segmentaion ORDER BY Avg_RFM_Score
---------------------------------------------------------------------------------------------------------------------------------
--*******************************************************************************************************************************
----Insights
--Distribution of Customers by Value Segment
SELECT
Value_Seg,
COUNT(CustomerID) AS Customer_Count
FROM Customer_Segmentaion
GROUP BY Value_Seg
ORDER BY Customer_Count
--==> We have highest Mid Value Customers (42%)
--Distribution of Customers by Customer Segment
SELECT
Cust_Seg,
COUNT(CustomerID) AS Customer_Count
FROM Customer_Segmentaion
GROUP BY Cust_Seg
ORDER BY Customer_Count
--==>Company have highest Dormat Customers (34%), 20% Regular Customers, 18% New Custoers, 16% Churned Customers & Lowest VIP Customers (12%)
--Distribution of customers across different RFM customer segments within each value segment
SELECT
Value_Seg,
Cust_Seg,
COUNT(CustomerID) AS Customer_Count
FROM Customer_Segmentaion
GROUP BY Cust_Seg,Value_Seg
ORDER BY Value_Seg,Customer_Count DESC
--==>Churned Customers are equally distributed among mid value & low value customers.
--==>Dormant Customes are distributed across all the value segments, low value segment have the maximum Dormant customers.
--==>Regular Customers are also distributed across all the value segments but majorly the Mid Value segment.
--==>New Customers are als0 distributed across all the value segments but majorly low value & mid value segment.
--==>55% of High Value segment customers are the VIP Customer