BigQueryの配列操作の関数化のメモ書きです。
関数化の実用性はあまりないと思いますが、配列を使った演算や条件をクエリに書くときの参考に。
2つの配列から重複を除いて結合(||演算バージョン)
CREATE TEMP FUNCTION ARRAY_UNION(a1 ANY TYPE, a2 ANY TYPE)
AS (
(SELECT ARRAY_AGG(DISTINCT i ORDER BY i) FROM UNNEST(a1 || a2) AS i)
);
a = [1,2,3,4,5]
b = [3,4,5,6,7]
ARRAY_UNION(a,b) -> [1,2,3,4,5,6,7]
2つの配列の共通部分の配列(INTERSECTバージョン)
CREATE TEMP FUNCTION ARRAY_INTERSECT(a1 ANY TYPE, a2 ANY TYPE)
AS (
(SELECT ARRAY_AGG(DISTINCT i ORDER BY i) FROM (
SELECT i FROM UNNEST(a1) AS i INTERSECT DISTINCT
SELECT i FROM UNNEST(a2) AS i ) w)
);
a = [1,2,3,4,5]
b = [3,4,5,6,7]
ARRAY_INTERSECT(a,b) -> [3,4,5]
2つの配列の共通部分の配列(JOINバージョン)
CREATE TEMP FUNCTION ARRAY_INTERSECT(a1 ANY TYPE, a2 ANY TYPE)
AS (
(SELECT ARRAY_AGG(DISTINCT i ORDER BY i) FROM
UNNEST(a1) AS i JOIN UNNEST(a2) AS j ON i = j)
);
a = [1,2,3,4,5]
b = [3,4,5,6,7]
ARRAY_INTERSECT(a,b) -> [3,4,5]
2つの配列の差(a1-a2)の配列(EXCEPTバージョン)
CREATE TEMP FUNCTION ARRAY_EXCEPT(a1 ANY TYPE, a2 ANY TYPE)
AS (
(SELECT ARRAY_AGG(DISTINCT i ORDER BY i) FROM (
SELECT i FROM UNNEST(a1) AS i EXCEPT DISTINCT
SELECT i FROM UNNEST(a2) AS i ) w)
);
a = [1,2,3,4,5]
b = [3,4,5,6,7]
ARRAY_EXCEPT(a,b) -> [1,2]
2つの配列の差(a1-a2)の配列(JOINバージョン)
CREATE TEMP FUNCTION ARRAY_EXCEPT(a1 ANY TYPE, a2 ANY TYPE)
AS (
(SELECT ARRAY_AGG(DISTINCT i ORDER BY i) FROM
UNNEST(a1) AS i LEFT JOIN UNNEST(a2) AS j ON i = j
WHERE j IS NULL)
);
a = [1,2,3,4,5]
b = [3,4,5,6,7]
ARRAY_EXCEPT(a,b) -> [1,2]
包含関係:2つ目の配列が1つ目の配列の部分集合(a2⊂a1)か判定(COUNTバージョン)
CREATE TEMP FUNCTION ARRAY_SUBSET(a1 ANY TYPE, a2 ANY TYPE)
AS (
(SELECT CASE WHEN COUNT(*) = 0 THEN TRUE ELSE FALSE END
FROM UNNEST(a2) AS i
WHERE i NOT IN (SELECT i FROM UNNEST(a1) AS i ))
);
a = [1,2,3,4,5]
b = [3,4,5,6,7]
ARRAY_SUBSET(a,b) -> FALSE
a = [1,2,3,4,5]
b = [1,3,5]
ARRAY_SUBSET(a,b) -> TRUE
包含関係:2つ目の配列が1つ目の配列の部分集合(a2⊂a1)か判定(EXISTSバージョン)
CREATE TEMP FUNCTION ARRAY_SUBSET(a1 ANY TYPE, a2 ANY TYPE)
AS (
(SELECT CASE WHEN
EXISTS (SELECT 1 FROM UNNEST(a2) AS i
WHERE i NOT IN (SELECT i FROM UNNEST(a1) AS i ))
THEN FALSE ELSE TRUE END)
);
a = [1,2,3,4,5]
b = [3,4,5,6,7]
ARRAY_SUBSET(a,b) -> FALSE
a = [1,2,3,4,5]
b = [1,3,5]
ARRAY_SUBSET(a,b) -> TRUE