1
2

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 3 years have passed since last update.

RでRFM分析.1

Last updated at Posted at 2020-10-29

記事の目的

RでRFM分析をします。RFMデータの作成からクラスタリングまで行います。
コピペすれば全部再現できます。
次回の記事(RでRFM分析.2)では、この一連の流れの関数を作成しています。

目次

1. 使用データの作成
2. RFMデータの作成と分布、統計量の確認
3. RFMポイントの作成と分布の確認
4. クラスタリング(k-means)

#1. 使用データの作成

library(dplyr)
set.seed(100)
Data <- data.frame(matrix(NA, ncol=3, nrow=3000))
names(Data) <- c("time", "customer_id", "price")
customer_id.tmp1 <- 1:500
customer_id.tmp2 <- runif(500, 301, 500)
customer_id.tmp3 <- runif(1000, 1, 100) %>% floor()
customer_id.tmp4 <- runif(1000, 1, 300) %>% floor()
Data$customer_id <- c(customer_id.tmp1, customer_id.tmp2, customer_id.tmp3, customer_id.tmp4)
Data$price <- 500 + rbinom(3000, 100, 0.01)*(runif(3000, 0, 10000) %>% floor())
time.tmp <- seq(as.Date("2020/03/01"), as.Date("2020/03/30"), "day")
Data$time <- sample(time.tmp, 3000, replace = TRUE)
Data %>% head()
#結果
        time customer_id price
1 2020-03-21           1   500
2 2020-03-19           2  7386
3 2020-03-03           3 21353
4 2020-03-28           4   500
5 2020-03-05           5  9022
6 2020-03-08           6  2338

#2. RFMデータの作成と分布、統計量の確認

#Recency
time <- Data$time %>% unique() %>% sort()
recency <- 1:length(time)
recency.tmp <- data.frame(time, recency)
Data.rfm.tmp <- left_join(Data, recency.tmp, by="time")
#RFM
Data.rfm <- Data.rfm.tmp %>% group_by(customer_id) %>% 
  summarize(r=max(recency), f=n(), m=sum(price))
Data.rfm %>% head()
#結果
  customer_id     r     f      m
        <dbl> <int> <int>  <dbl>
1           1    29    21 128370
2           2    23    11  39309
3           3    30    20 157797
4           4    28     8  49611
5           5    30    20 107080
6           6    29    10  60323

#RFMデータの可視化と統計量
par(mfrow=c(1,3))
Data.rfm$recency %>% hist(main="recency")
Data.rfm$frequency %>% hist(main="frequency")
Data.rfm$monetary %>% hist(main="monetary")
summary(Data.rfm)
#結果
  customer_id          r               f            m         
 Min.   :  1.0   Min.   : 1.00   Min.   : 1   Min.   :   500  
 1st Qu.:250.8   1st Qu.:11.00   1st Qu.: 1   1st Qu.:   500  
 Median :362.1   Median :20.00   Median : 1   Median :  6222  
 Mean   :326.6   Mean   :18.66   Mean   : 3   Mean   : 15972  
 3rd Qu.:428.5   3rd Qu.:27.00   3rd Qu.: 3   3rd Qu.: 17465  
 Max.   :500.0   Max.   :30.00   Max.   :26   Max.   :193111  

image.png

#3. RFMポイントの作成と分布の確認

#RFMポイント設定
r.c <- c(3, 8, 15, 22)
f.c <- c(2, 5, 11, 16)
m.c <- c(501, 10000, 50000, 100000)
rfm.point <- Data.rfm %>% mutate(r.point = if_else(r<r.c[1],1,if_else(r<r.c[2],2,if_else(r<r.c[3],3,if_else(r<r.c[4],4,5)))),
                             f.point = if_else(f<f.c[1],1,if_else(f<f.c[2],2,if_else(f<f.c[3],3,if_else(f<f.c[4],4,5)))),
                             m.point = if_else(m<m.c[1],1,if_else(m<m.c[2],2,if_else(m<m.c[3],3,if_else(m<m.c[4],4,5)))))
rfm.point %>% head()
#結果
  customer_id     r     f      m r.point f.point m.point
        <dbl> <int> <int>  <dbl>   <dbl>   <dbl>   <dbl>
1           1    29    21 128370       5       5       5
2           2    23    11  39309       5       4       3
3           3    30    20 157797       5       5       5
4           4    28     8  49611       5       3       3
5           5    30    20 107080       5       5       5
6           6    29    10  60323       5       3       4
#R,F,M得点分布
r <- rfm.point %>% group_by(r.point) %>% summarise(r.point = n()) 
f <- rfm.point %>% group_by(f.point) %>% summarise(f.point = n()) 
m <- rfm.point %>% group_by(m.point) %>% summarise(m.point = n()) 
n.rfm <- data.frame(r, f, m)
n.rfm
# 結果
  r.point f.point m.point
1      35     710     264
2     112     109     371
3     191      99     275
4     216      50      68
5     446      32      22
#総合得点
point <- rfm.point$r.point+rfm.point$f.point+rfm.point$m.point
n.rfm.sum <- point %>% as.factor() %>% summary()
n.rfm.sum
# 結果
  3   4   5   6   7   8   9  10  11  12  13  14  15 
 11  56 122 178 192 141  77  54  65  33  37  18  16 

#4. クラスタリング(k-means)

#クラスター分析
library(lattice)
library(gridExtra)
km <- kmeans(rfm.point[,5:7], 3, iter.max = 30)
rfm.point$cluster <- km$cluster %>% as.factor()
p1 <- bwplot(r.point~cluster, rfm.point, horizontal = FALSE)
p2 <- bwplot(f.point~cluster, rfm.point, horizontal = FALSE)
p3 <- bwplot(m.point~cluster, rfm.point, horizontal = FALSE)
n.cluster <- rfm.point %>% group_by(cluster) %>% summarize(cout =n())
n.cluster
grid.arrange(p1, p2, p3)
# 結果
  cluster  cout
  <fct>   <int>
1 1         419
2 2         245
3 3         336

image.png

1
2
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
1
2

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?