1. Athena, Redash を叩くときによく使う処理まとめる
2. 目次
3. Redash
3.1. マルチフィルター
Redash
select
...
column_name as "column_name::multi-filter"
...
from
...
3.2. ダッシュボードから絞る
Redash
select
...
, case
when '{{ status }}' = 'A' then column_a
when '{{ status }}' = 'B' then column_b
else -1
end as column_name
from
...
where
installed_date >= '{{ date_range.start }}'
AND installed_date <= '{{ date_range.end }}'
AND case
when 'ALL' in ({{ サイズ名 }}) then true
else c.size in ({{ サイズ名 }})
end
4. Athena
4.1. 文字列 → 日付
Athena
DATE_PARSE(ym,'%Y%m')
DATE_PARSE(a_date,'%Y-%m-%d %H:%i:%s')
4.2. 日付の計算
Athena
date_add('hour', 9, cast('2022/01/27 16:21:50' as timestamp)) a_date
4.3. timestamp → 文字列
Athena
date_format(cast(a_date as timestamp)), '%Y-%m-%d %H:%i:%s')
4.4. 今月の月初、翌月の月初
Athena
DATE_PARSE(cast(yow(current_date) as varchar) || substr('0' || cast(month(current_date) as varchar), -2, 3) || '01', '%Y%m%d')
, DATE_PARSE(cast(yow(current_date) as varchar) || substr('0' || cast(month(current_date + interval '1' month) as varchar), -2, 3) || '01', '%Y%m%d')
4.5. 文字列のスライス
Athena
substr('string', 1, 3) -- index=1から始まる. str
4.6. zero パディング
Athena
substr('00' || column_name, -2, 2)
4.7. 型変換
Athena
cast(column_name as double) as column_name
4.8. 文字列結合
Athena
'2021-11-01' || ' ~ ' || '2021-11-30'
4.9. 条件分岐
Athena
where
case
when 'ALL' in ({{ country }}) then True
else country in ({{ country }})
end
4.10. 重複レコードから最新の値を取得する
Athena
select
*
from
a_table as a
where not exists (
select
*
from
a_table b
where
a.name = b.name
and a.age = b.age
and a.ymd < b.ymd
)
4.11. 累積和(前行との累積)
Athena
select
a
, sum(num_value) OVER (partition by a_date ORDER BY a_col ASC ROWS between unbounded preceding and current row) as cum
from
a_table
4.12. join
- outer join さえわかれば、inner join, left join, right join はわかるはず
4.13. テーブルA, テーブルBの和集合を取りたい
select
item_name
, color
, max(A_zaiko)
, max(B_zaiko)
from (
select
item_name
, color
, zaiko as A_zaiko
, 0 as B_zaiko
from
A_table
union all
select
name
, color
, 0 as A_zaiko
, zaiko as B_zaiko
from
B_table
)
group by
name
, color
4.14. 複数カラムの重複レコードを調べたい
select
*
from (
select
name
, color
, count(*)
from
soko
group by
name
, color
)
where
count > 1