滅多に使わないが故にすぐ忘れてしまうこと必至の処理だったので、メモっておく。
cvが1のときにデータを区切るためのidを振りたい。
# library -----------------------------------------------------------------
require(random)
require(dplyr)
# makinkg data ------------------------------------------------------------
set.seed(71)
N <- 5
session_id <- as.vector(random::randomStrings(n = N,len = 8, digits = T, unique = T))
times <- floor(runif(N, min = 3, max = 10))
dat <- data.frame(
session_id = rep(session_id, times),
times = unlist(sapply(times, FUN = function(x) 1:x))
)
dat["cv"] <- rbinom(n = nrow(dat), size = 1, prob = 0.2)
# preprocess --------------------------------------------------------------
dat_pp <- dat %>%
dplyr::group_by(session_id) %>%
dplyr::mutate(cv_cumsum = cumsum(cv)) %>%
dplyr::mutate(cv_id = dplyr::lag(cv_cumsum, n = 1, default = 0)) %>%
dplyr::select(session_id, cv_id, times, cv)
# check
tibble::glimpse(dat_pp)
結果は以下の通り
session_id cv_id times cv
1 H5bnOPEl 0 1 1
2 H5bnOPEl 1 2 0
3 H5bnOPEl 1 3 1
4 H5bnOPEl 2 4 0
5 H5bnOPEl 2 5 0
6 pXbymzES 0 1 0
7 pXbymzES 0 2 0
8 pXbymzES 0 3 0
9 pXbymzES 0 4 0
10 pXbymzES 0 5 1
11 pXbymzES 1 6 0
12 G60tnnDx 0 1 0
13 G60tnnDx 0 2 1
14 G60tnnDx 1 3 1
15 G60tnnDx 2 4 1
16 G60tnnDx 3 5 0
17 FokLEvUI 0 1 0
18 FokLEvUI 0 2 0
19 FokLEvUI 0 3 0
20 FokLEvUI 0 4 0
21 O0jUTbyT 0 1 0
22 O0jUTbyT 0 2 0
23 O0jUTbyT 0 3 0
24 O0jUTbyT 0 4 0
25 O0jUTbyT 0 5 1