前段
JSONデータの某サービスの検証データをSnowflakeのTESTテーブルに取り込みます。
JSONなので1カラムのVARIANT型のデータになります。
fee_detailsという項目のデータはネストしています。
このネスト項目を扱うためにFLATTEN関数というのを初めて使いました。
理解のためにやってみたことを残します。
1.TEST_JSONテーブルを作成
create or replace TEST_JSON (
JSON_DATA VARIANT
);
2.サンプルデータを3件INSERT
以下の3データを作ります。
↓サンプルデータ1:fee_detailsのデータが0行
INSERT INTO TEST_JSON
SELECT
TO_VARIANT(PARSE_JSON('{"id":"txn_000000000000000000000000","object":"balance_transaction","amount":-120011,"available_on":1697760000,"created":1697652077,"currency":"jpy","description":"PAYOUT","exchange_rate":null,"fee":0,"fee_details":[],"net":-120011,"reporting_category":"payout","source":"po_000000000000000000000000","status":"available","type":"payout"}'));
↓サンプルデータ2:fee_detailsのデータが1行
INSERT INTO TEST_JSON
SELECT
TO_VARIANT(PARSE_JSON('{"id":"txn_111111111111111111111111","object":"balance_transaction","amount":119460,"available_on":1698883200,"created":1698307850,"currency":"jpy","description":"Payment for Invoice","exchange_rate":null,"fee":4301,"fee_details":[{"amount":4301,"application":null,"currency":"jpy","description":"Processing fees","type":"system_fee"}],"net":115159,"reporting_category":"charge","source":"ch_111111111111111111111111","status":"available","type":"charge"}'));
↓サンプルデータ3:fee_detailsのデータが2行
INSERT INTO TEST_JSON
SELECT
TO_VARIANT(PARSE_JSON('{"id":"txn_222222222222222222222222","object":"balance_transaction","amount":383020,"available_on":1698710400,"created":1698140172,"currency":"jpy","description":"Payment for Invoice","exchange_rate":null,"fee":6320,"fee_details":[{"amount":5745,"application":null,"currency":"jpy","description":"Processing fees","type":"system_fee"},{"amount":575,"application":null,"currency":"jpy","description":"JCT","type":"tax"}],"net":376700,"reporting_category":"charge","source":"py_222222222222222222222222","status":"available","type":"payment"}'));
INSERTに何度か失敗して、PARSE_JSONしたうえでさらにTO_VARIANTする技を発見してINSERTできました。
3.View1、View2、View3を作成
Create ViewのSQLは場所を取るので折り畳み状態にしておきます。
View1:JSONデータを単純にViewにした
CREATE VIEW VIEW1(
AMOUNT,AVAILABLE_ON,CREATED,CURRENCY,DESCRIPTION,EXCHANGE_RATE,FEE,FEE_DETAILS,
ID,NET,OBJECT,REPORTING_CATEGORY,SOURCE,STATUS,TYPE)
AS
SELECT
t1.JSON_DATA:amount as amount,
t1.JSON_DATA:available_on as available_on,
t1.JSON_DATA:created as created,
REPLACE(t1.JSON_DATA:currency, '"') as currency,
REPLACE(t1.JSON_DATA:description, '"') as description,
t1.JSON_DATA:exchange_rate as exchange_rate,
t1.JSON_DATA:fee as fee,
t1.JSON_DATA:fee_details as fee_details,--JSONのまま
REPLACE(t1.JSON_DATA:id, '"') as id,
t1.JSON_DATA:net as net,
REPLACE(t1.JSON_DATA:object, '"') as object,
REPLACE(t1.JSON_DATA:reporting_category, '"') as reporting_category,
REPLACE(t1.JSON_DATA:source, '"') as source,
REPLACE(t1.JSON_DATA:status, '"') as status,
REPLACE(t1.JSON_DATA:type, '"') as type
FROM
TEST_JSON t1
;
View2:JSONデータをネスト部分をFLATTEN関数をかました
CREATE VIEW VIEW2(
AMOUNT,AVAILABLE_ON,CREATED,CURRENCY,DESCRIPTION,EXCHANGE_RATE,FEE,FEE_DETAILS_AMOUNT,
FEE_DETAILS_APPLICATION,FEE_DETAILS_CURRENCY,FEE_DETAILS_DESCRIPTION,FEE_DETAILS_TYPE,ID,NET,
OBJECT,REPORTING_CATEGORY,SOURCE,STATUS,TYPE)
AS
SELECT
t1.JSON_DATA:amount as amount,
t1.JSON_DATA:available_on as available_on,
t1.JSON_DATA:created as created,
REPLACE(t1.JSON_DATA:currency, '"') as currency,
REPLACE(t1.JSON_DATA:description, '"') as description,
t1.JSON_DATA:exchange_rate as exchange_rate,
t1.JSON_DATA:fee as fee,
f1.value:amount as fee_details_amount,
f1.value:application as fee_details_application,
REPLACE(f1.value:currency, '"') as fee_details_currency,
REPLACE(f1.value:description, '"') as fee_details_description,
REPLACE(f1.value:type, '"') as fee_details_type,
REPLACE(t1.JSON_DATA:id, '"') as id,
t1.JSON_DATA:net as net,
REPLACE(t1.JSON_DATA:object, '"') as object,
REPLACE(t1.JSON_DATA:reporting_category, '"') as reporting_category,
REPLACE(t1.JSON_DATA:source, '"') as source,
REPLACE(t1.JSON_DATA:status, '"') as status,
REPLACE(t1.JSON_DATA:type, '"') as type
FROM
TEST_JSON t1,
lateral flatten(t1.JSON_DATA:fee_details) f1
;
View3:JSONデータをネスト部分をFLATTEN関数をかまし、さらにオプションのouter=>trueをつけた
CREATE VIEW VIEW3(
AMOUNT,AVAILABLE_ON,CREATED,CURRENCY,DESCRIPTION,EXCHANGE_RATE,FEE,FEE_DETAILS_AMOUNT,
FEE_DETAILS_APPLICATION,FEE_DETAILS_CURRENCY,FEE_DETAILS_DESCRIPTION,FEE_DETAILS_TYPE,ID,NET,
OBJECT,REPORTING_CATEGORY,SOURCE,STATUS,TYPE)
AS
SELECT
t1.JSON_DATA:amount as amount,
t1.JSON_DATA:available_on as available_on,
t1.JSON_DATA:created as created,
REPLACE(t1.JSON_DATA:currency, '"') as currency,
REPLACE(t1.JSON_DATA:description, '"') as description,
t1.JSON_DATA:exchange_rate as exchange_rate,
t1.JSON_DATA:fee as fee,
f1.value:amount as fee_details_amount,
f1.value:application as fee_details_application,
REPLACE(f1.value:currency, '"') as fee_details_currency,
REPLACE(f1.value:description, '"') as fee_details_description,
REPLACE(f1.value:type, '"') as fee_details_type,
REPLACE(t1.JSON_DATA:id, '"') as id,
t1.JSON_DATA:net as net,
REPLACE(t1.JSON_DATA:object, '"') as object,
REPLACE(t1.JSON_DATA:reporting_category, '"') as reporting_category,
REPLACE(t1.JSON_DATA:source, '"') as source,
REPLACE(t1.JSON_DATA:status, '"') as status,
REPLACE(t1.JSON_DATA:type, '"') as type
FROM
TEST_JSON t1,
lateral flatten(t1.JSON_DATA:fee_details, outer=>true) f1
;
結果
- TEST_JSONテーブルのレコード数・・・3(サンプルデータ1、サンプルデータ2、サンプルデータ3が各1件)
- View1のレコード数・・・3(元のTEST_JSONテーブルと同じ)
- View2のレコード数・・・3(サンプルデータ1が0件、サンプルデータ2が1件、サンプルデータ3が2件)
- View3のレコード数・・・4(サンプルデータ1が1件、サンプルデータ2が1件、サンプルデータ3が2件)
※View2の件数は元データ(TEST_JSONテーブル)と同じ3件だが、これは偶然の一致というか、結果的に一致しただけ。
わかったこと
- 自分の使い方ではouter=>trueのオプションは必須であり、これを付けないケースは思い当たらない。(今回の例だとfee_detailsデータがない場合は、データそのものがなかったことになってしまう)
- View定義のflattenの部分をよーく眺めていると、これは単なるJOINだということに気づいた。オプションなしだと内部結合、outer=>trueのオプションをつけると外部結合。これで完全に理解した!
p.s.
この投稿をするにあたりFLATTENでQiitaを検索したら、RubyやPythonでもFLATTENってあるんですね。知らんかった。。。
Snowflakeで初めて使ったのでSnowflakeの独自関数かと思ってました。