# standardSQL
WITH
-- 元データからアクションユーザid、アクション実行日を作成
base_table AS (
SELECT DISTINCT
user_id
, DATE(creation_date) AS action_date
, 1 AS action_flag
FROM
`bigquery-public-data.stackoverflow.comments`
WHERE
DATE(creation_date) >= '2018-10-01'
)
-- コホート作成の対象期間を作成
, calendar_table AS (
SELECT
base_date
FROM
UNNEST(GENERATE_DATE_ARRAY('2018-10-01', '2018-10-09', INTERVAL 1 DAY)) AS base_date
)
-- アクションユーザid毎に、アクション実施日で並び替えて、次回のアクション実施日を取得
, process_table1 AS (
SELECT DISTINCT
base_date
, action_date
, user_id
, action_flag
, lead(user_id,1) OVER (PARTITION BY base_date, user_id ORDER BY action_date) AS lead_id
, lead(action_date,1) OVER (PARTITION BY base_date, user_id ORDER BY action_date) AS lead_action_date
FROM
base_table
CROSS JOIN
calendar_table
WHERE
action_date >= base_date
)
-- アクションユーザid毎に、アクション実施日に対して次回のアクション実施日が連続となっているかどうかをチェック
, process_table2 AS (
SELECT
*
, CASE
WHEN lead_action_date is NULL THEN 1
WHEN user_id = lead_id AND DATE_ADD(action_date, INTERVAL 1 DAY) <> lead_action_date THEN 1
ELSE 0
END AS not_continue_date_flg
FROM
process_table1
)
-- 対象期間中、 id毎にアクションが連続日になっていない日数をカウント
, process_table3 AS (
SELECT
*
, SUM(not_continue_date_flg) OVER (PARTITION BY base_date, user_id) AS cum_not_continue_date_flg
FROM
process_table2
)
-- 対象期間中、id毎に連続日数を計算
, process_table4 AS (
SELECT
*
, SUM(action_flag) OVER (PARTITION BY base_date, user_id, cum_not_continue_date_flg) AS continue_num
FROM
process_table3
)
-- 対象日について計測期間中の初回アクションかどうか?をフラグ立て
, include_base_date_flag_table AS (
SELECT
base_date
, user_id
, CASE
WHEN base_date = min_action_date then 1
ELSE 0
END AS include_base_date_flag
FROM (
SELECT
base_date
, user_id
, min(action_date) as min_action_date
FROM
process_table4
GROUP BY
1, 2
)
)
-- 対象日からみた、連続日毎のUU
SELECT DISTINCT
base_date
, continue_num
, COUNT(DISTINCT user_id) OVER (PARTITION BY base_date, continue_num) AS user_count
, COUNT(DISTINCT user_id) OVER (PARTITION BY base_date) AS base_user_count
, COUNT(DISTINCT user_id) OVER (PARTITION BY base_date, continue_num) / COUNT(DISTINCT user_id) OVER (PARTITION BY base_date) AS continue_rate
FROM
process_table4 main
LEFT OUTER JOIN
include_base_date_flag_table target
USING(user_id, base_date)
WHERE
include_base_date_flag = 1
;