まえがき
いろんな集合演算しようとおもいます。
元データ
有名なsubpartsとpartsを題材に取り組みます。
init.sqlを実行します。
select文の直前なら、with句どこでもかけるっぽいです。
merge文はinsert文だけ実行しています。(試してみたかっただけ。。)
そのあとさりげなくdelete文も書けるっぽいです。(試してみたかっただけ。。)
さらにそのあと、別テーブルに対する操作もいけます。
--SET SERVEROUTPUT ON
DROP TABLE subparts;
DROP TABLE parts;
CREATE TABLE subparts (
sno CHAR(2 BYTE)
, pno CHAR(3 BYTE)
, PRIMARY KEY ( sno, pno )
);
CREATE TABLE parts (
pno CHAR(3 BYTE)
, PRIMARY KEY ( pno )
);
BEGIN
FOR I IN 97..122 LOOP
--ここからsubpartsテーブルに対する操作
MERGE INTO subparts tar
USING(
WITH rec (
pno
)AS(
SELECT
1
FROM
dual
UNION ALL
SELECT
pno + 1
FROM
rec
WHERE
pno + 1 <= 5
)
SELECT
CHR(I) || CHR(I) AS sno
,lpad(rec.pno,3,0) AS pno
FROM
dual
CROSS JOIN rec
) src
ON
(
tar.sno = src.sno
AND tar.pno = src.pno
)
WHEN NOT MATCHED THEN
INSERT VALUES
(
src.sno
,src.pno
)
;
DELETE FROM subparts tar
WHERE
CASE
WHEN MOD(I,2) = 0 THEN CHR(I) || CHR(I)
END = tar.sno
AND tar.pno = '001'
;
DELETE FROM subparts tar
WHERE
CASE
WHEN MOD(I,3) = 0 THEN CHR(I) || CHR(I)
END = tar.sno
AND tar.pno = '002'
;
DELETE FROM subparts tar
WHERE
CASE
WHEN MOD(I,5) = 0 THEN CHR(I) || CHR(I)
END = tar.sno
AND tar.pno = '003'
;
DELETE FROM subparts tar
WHERE
CASE
WHEN MOD(I,7) = 0 THEN CHR(I) || CHR(I)
END = tar.sno
AND tar.pno = '004'
;
DELETE FROM subparts tar
WHERE
CASE
WHEN MOD(I,11) = 0 THEN CHR(I) || CHR(I)
END = tar.sno
AND tar.pno = '005'
;
--ここからpartsテーブルに対する操作
MERGE INTO parts tar
USING(
WITH rec (
pno
)AS(
SELECT
1
FROM
dual
UNION ALL
SELECT
pno + 1
FROM
rec
WHERE
pno + 1 <= 26
)
SELECT
lpad(rec.pno,3,0) AS pno
FROM
dual
CROSS JOIN rec
) src
ON
(
tar.pno = src.pno
)
WHEN NOT MATCHED THEN
INSERT VALUES
(
src.pno
)
;
DELETE
FROM
parts
WHERE
pno >= 6
;
END LOOP
;
END
;
--commit;
【init.sql実行結果】
作成されたデータに対していろいろ調べてみたいと思います。
SELECT
CASE--削除対象の数字には0を返却、それ以外は1を返却
WHEN 0 IN (MOD(ASCII(s1.sno),2),MOD(ASCII(s1.sno),3),MOD(ASCII(s1.sno),5),MOD(ASCII(s1.sno),7),MOD(ASCII(s1.sno),11)) THEN 0
ELSE 1
END AS flg
,CASE
WHEN NOT EXISTS (--partsから各snoのpnoを減算し、引き残しがなければ、1を返却、それ以外は0を返却
SELECT
s4.pno
FROM
parts s4
MINUS
SELECT
s5.pno
FROM
subparts s5
WHERE
s5.sno = s1.sno
) THEN 1
ELSE 0
END AS comp_jdg
,CASE--各snoがpartsに存在するpnoを完全に一致する場合、1を返却、それ以外は0を返却
WHEN
NOT EXISTS (--partsから各snoのpnoを減算し、引き残しがなければ、trueを返却、それ以外はfalseを返却
SELECT
s6.pno
FROM
parts s6
MINUS
SELECT
s7.pno
FROM
subparts s7
WHERE
s7.sno = s1.sno
)
AND NOT EXISTS (--各snoのpnoからpartsのpnoを減算し、引き残しがなければ、trueを返却、それ以外はfalseを返却
SELECT
s8.pno
FROM
subparts s8
WHERE
s8.sno = s1.sno
MINUS
SELECT
s9.pno
FROM
parts s9
) THEN 1
ELSE 0
END AS comp_jdg_ana
,(
SELECT
LISTAGG(sub.pno, ',') WITHIN GROUP (ORDER BY sub.pno)
FROM
(--partsから各snoのpnoを減算し、引き残したものをカンマ区切りでキュッと集約
SELECT
s2.pno AS pno
FROM
parts s2
MINUS
SELECT
s3.pno AS pno
FROM
subparts s3
WHERE
s3.sno = s1.sno
) sub
) AS miss_pno
,MOD(ASCII(s1.sno),2)
,MOD(ASCII(s1.sno),3)
,MOD(ASCII(s1.sno),5)
,MOD(ASCII(s1.sno),7)
,MOD(ASCII(s1.sno),11)
,ASCII(s1.sno)
,s1.*
FROM
subparts s1
;
【ana.sqlの取得結果(一部抜粋)】
subpartsで同じ種類の部品を持つ同業者
本題に入る前に本題に首突っ込んだ感じはありますが、気にせず、
思いつくもの書いていこうと思います。
WITH cnt_by_parts AS (
SELECT
sno
,COUNT(PNO) AS cnt
FROM
subparts
GROUP BY
sno
)
,cnt_by_sno AS (
SELECT
cnt
,LISTAGG(sno,',') WITHIN GROUP (ORDER BY sno) AS cmb
FROM
cnt_by_parts
GROUP BY
cnt
)
SELECT
*
FROM
cnt_by_sno
ORDER BY
cnt DESC
;
【non_perf.sqlの取得結果】
各供給業者の部品ひとつひとつの一致は気にせず、
同じ部品数を保持しているという観点から見た供給業者の組み合わせは
以下のようになります。
各供給業者の部品ひとつひとつの一致まで気にしていこうと思います。
SELECT
s1.sno AS s1
,s2.sno AS s2
,SUM(
CASE
WHEN s1.pno = s2.pno THEN 1
ELSE 0
END
) AS perf
,(
SELECT
COUNT(DISTINCT s3.pno)
FROM
subparts s3
WHERE
s1.sno = s3.sno
) AS self_cnt
,(
SELECT
COUNT(DISTINCT s3.pno)
FROM
subparts s3
WHERE
s2.sno = s3.sno
) AS other_cnt
FROM
subparts s1
LEFT OUTER JOIN subparts s2
ON
s1.sno < s2.sno
AND s1.pno = s2.pno
WHERE
s2.pno IS NOT NULL
GROUP BY
s1.sno
,s2.sno
HAVING
SUM(
CASE
WHEN s1.pno = s2.pno THEN 1
ELSE 0
END
) = ALL
(
(
SELECT
COUNT(DISTINCT s3.pno)
FROM
subparts s3
WHERE
s1.sno = s3.sno
)
-- ,(
-- SELECT
-- COUNT(DISTINCT s4.pno)
-- FROM
-- subparts s4
-- WHERE
-- s2.sno = s4.sno
-- )
)
ORDER BY
s1.sno
,s2.sno
;
【non_perf_tmp.sqlの取得結果】
相方と自身の部品が一致した件数と
相方が供給している部品数と
自身が供給している部品数の3つが一致していないものは
互いに過不足があるものです。
過不足がないものについて抽出するために、
コメントアウトしていた箇所をコメントインします。
まず、1つめ。
SELECT
s1.sno AS s1
,s2.sno AS s2
,SUM(
CASE
WHEN s1.pno = s2.pno THEN 1
ELSE 0
END
) AS perf
,(
SELECT
COUNT(DISTINCT s3.pno)
FROM
subparts s3
WHERE
s1.sno = s3.sno
) AS self_cnt
,(
SELECT
COUNT(DISTINCT s3.pno)
FROM
subparts s3
WHERE
s2.sno = s3.sno
) AS other_cnt
FROM
subparts s1
LEFT OUTER JOIN subparts s2
ON
s1.sno < s2.sno
AND s1.pno = s2.pno
WHERE
s2.pno IS NOT NULL
GROUP BY
s1.sno
,s2.sno
HAVING
SUM(
CASE
WHEN s1.pno = s2.pno THEN 1
ELSE 0
END
) = ALL
(
(
SELECT
COUNT(DISTINCT s3.pno)
FROM
subparts s3
WHERE
s1.sno = s3.sno
)
,(
SELECT
COUNT(DISTINCT s4.pno)
FROM
subparts s4
WHERE
s2.sno = s4.sno
)
)
ORDER BY
s1.sno
,s2.sno
;
【method_1.sqlの取得結果】
この調子で次々いこうと思います。2つめ。
SELECT
s1.sno AS s1
,s2.sno AS s2
,COUNT(s1.PNO) AS s1cnt
,COUNT(s2.PNO) AS s2cnt
FROM
subparts s1
LEFT OUTER JOIN subparts s2
ON
s1.sno < s2.sno--自身以外の供給業者のなかで
AND s1.PNO = s2.PNO--自身と同じ部品をもち
WHERE
s2.sno IS NOT NULL--自身より大きい文字コードをもつ供給業者が存在しないのレコードは除く
GROUP BY
s1.sno
,s2.sno
HAVING
COUNT(DISTINCT s1.PNO) = (--自身の部品の種類数と同じ数だけ、相方の部品の種類数も存在している
SELECT
COUNT(DISTINCT s3.PNO)
FROM
subparts s3
WHERE
s2.sno = s3.sno--自身の相方に含まれる部品の種類数をカウント
)
AND COUNT(DISTINCT s2.PNO) = (
SELECT
COUNT(DISTINCT s4.PNO)
FROM
subparts s4
WHERE
s1.sno = s4.sno--相方から見ても自身に含まれる部品の種類数をカウント
)
ORDER BY
s1.sno
,s2.sno
;
【mehod_2.sqlの取得結果(一部抜粋)】
この調子で次々いこうと思います。3つめ。
--cross join しているので、countはdistinctを付与
SELECT
s1.sno AS s1
,s2.sno AS s2
,count(distinct s1.pno) as p1
,count(distinct s2.pno) as p2
FROM
subparts s1
CROSS JOIN subparts s2
WHERE
s1.sno < s2.sno--自身よりも大きい文字コードをもつ供給業者
AND NOT EXISTS (
SELECT
1
FROM
subparts s3
WHERE
s1.sno = s3.sno--自身が供給しているの部品の中に
AND s3.PNO NOT IN (--存在しない【not existsでは部品の量は推し量ることはできない】
SELECT
s4.PNO
FROM
subparts s4
WHERE
s2.sno = s4.sno--相方の供給業者に含まれる部品数
)
)
AND NOT EXISTS (
SELECT
1
FROM
subparts s3
WHERE
s2.sno = s3.sno--自身の相方が供給しているの部品の中に
AND s3.PNO NOT IN (--存在しない【not existsでは部品の量は推し量ることはできない】
SELECT
s4.PNO
FROM
subparts s4
WHERE
s1.sno = s4.sno--自身が供給している部品数
)
)
GROUP BY
s1.sno
,s2.sno
ORDER BY
s1.sno
,s2.sno
;
【mehod_3.sqlの取得結果(一部抜粋)】
この調子で次々いこうと思います。4つめ。
--cross join しているので、countはdistinctを付与
SELECT
s1.sno AS s1
,s2.sno AS s2
,count(distinct s1.pno) as p1
,count(distinct s2.pno) as p2
FROM
subparts s1
CROSS JOIN subparts s2
WHERE
s1.sno < s2.sno--自身よりも大きい文字コードをもつ供給業者
AND NOT EXISTS (--引き残しが存在しないならば、過不足なく同じ
SELECT
s3.pno
FROM
subparts s3
WHERE
s1.sno = s3.sno--自身が供給しているの部品数から
minus--減算し
SELECT
s4.PNO
FROM
subparts s4
WHERE
s2.sno = s4.sno--相方の供給業者に含まれる部品数
)
AND NOT EXISTS (--引き残しが存在しないならば、過不足なく同じ
SELECT
s3.pno
FROM
subparts s3
WHERE
s2.sno = s3.sno--自身の相方が供給しているの部品数から
minus--減算し
SELECT
s4.PNO
FROM
subparts s4
WHERE
s1.sno = s4.sno--自身が供給している部品数
)
GROUP BY
s1.sno
,s2.sno
ORDER BY
s1.sno
,s2.sno
;
【mehod_4.sqlの取得結果(一部抜粋)】
この調子で次々いこうと思います。5つめ。
--cross join しているので、countはdistinctを付与
SELECT
s1.sno AS s1
,s2.sno AS s2
,COUNT(DISTINCT s1.pno) AS s1cnt
,COUNT(DISTINCT s2.pno) AS s2cnt
FROM
subparts s1
CROSS JOIN subparts s2
WHERE
s1.sno < s2.sno--自身よりも大きい文字コードをもつ供給業者
GROUP BY
s1.sno
,s2.sno
HAVING
SUM(CASE
WHEN s1.pno = s2.pno THEN 1 ELSE 0 END
) = ALL(
(
SELECT
COUNT(DISTINCT s3.pno)
FROM
subparts s3
WHERE
s1.sno = s3.sno
)
,(
SELECT
COUNT(DISTINCT s4.pno)
FROM
subparts s4
WHERE
s2.sno = s4.sno
)
)
ORDER BY
s1.sno
,s2.sno
;
【mehod_5.sqlの取得結果(一部抜粋)】
最初と同じですが、取得列が違います。^^
気を取り直して、6つめ。
SELECT
s1.sno AS s1
,s2.sno AS s2
,count(s1.pno) as s1cnt
,count(s2.pno) as s2cnt
FROM
subparts s1
FULL OUTER JOIN subparts s2
ON
s1.sno <> s2.sno
AND s1.pno = s2.pno
GROUP BY
s1.sno
,s2.sno
HAVING
COUNT(DISTINCT s1.pno) = (--自身の部品の種類数と同じ数だけ、相方の部品の種類数も存在している
SELECT
COUNT(DISTINCT s3.pno)
FROM
subparts s3
WHERE
s2.sno = s3.sno--自身の相方に含まれる部品の種類数をカウント
)
AND COUNT(DISTINCT s2.pno) = (
SELECT
COUNT(DISTINCT s4.pno)
FROM
subparts s4
WHERE
s1.sno = s4.sno--相方から見ても自身に含まれる部品の種類数をカウント
)
ORDER BY
s1.sno
,s2.sno
;
【mehod_6.sqlの取得結果(一部抜粋)】
求めるものとは違いますが、
ペアの順序ちがいまでもunion allしているイメージです。
grouping sets ((s1.pno,s2.pno))【group by s1.pno,s2.pnoと等価】と
grouping sets ((s2.pno,s1.pno))【group by s2.pno,s1.pnoと等価】を
union allした結果のイメージでもあるかもしれません。
そこで、method_6.sqlの結果をmethod_2.sqlから作り出してみます。
WITH groupby_s1_to_s2 AS (
SELECT
s1.sno AS s1
,s2.sno AS s2
,SUM(
CASE
WHEN s1.pno = s2.pno THEN 1
ELSE 0
END
) AS perf
,(
SELECT
COUNT(DISTINCT s3.pno)
FROM
subparts s3
WHERE
s1.sno = s3.sno
) AS self_cnt
,(
SELECT
COUNT(DISTINCT s3.pno)
FROM
subparts s3
WHERE
s2.sno = s3.sno
) AS other_cnt
FROM
subparts s1
LEFT OUTER JOIN subparts s2
ON
s1.sno < s2.sno
AND s1.pno = s2.pno
WHERE
s2.pno IS NOT NULL
GROUP BY
s1.sno
,s2.sno
HAVING
SUM(
CASE
WHEN s1.pno = s2.pno THEN 1
ELSE 0
END
) = ALL
(
(
SELECT
COUNT(DISTINCT s3.pno)
FROM
subparts s3
WHERE
s1.sno = s3.sno
)
,(
SELECT
COUNT(DISTINCT s4.pno)
FROM
subparts s4
WHERE
s2.sno = s4.sno
)
)
ORDER BY
s1.sno
,s2.sno
)
, groupby_s2_to_s1 AS (
SELECT
s2.sno AS s2
,s1.sno AS s1
,SUM(
CASE
WHEN s1.pno = s2.pno THEN 1
ELSE 0
END
) AS perf
,(
SELECT
COUNT(DISTINCT s3.pno)
FROM
subparts s3
WHERE
s1.sno = s3.sno
) AS self_cnt
,(
SELECT
COUNT(DISTINCT s3.pno)
FROM
subparts s3
WHERE
s2.sno = s3.sno
) AS other_cnt
FROM
subparts s1
LEFT OUTER JOIN subparts s2
ON
s1.sno < s2.sno
AND s1.pno = s2.pno
WHERE
s2.pno IS NOT NULL
GROUP BY
s2.sno
,s1.sno
HAVING
SUM(
CASE
WHEN s1.pno = s2.pno THEN 1
ELSE 0
END
) = ALL
(
(
SELECT
COUNT(DISTINCT s3.pno)
FROM
subparts s3
WHERE
s1.sno = s3.sno
)
,(
SELECT
COUNT(DISTINCT s4.pno)
FROM
subparts s4
WHERE
s2.sno = s4.sno
)
)
ORDER BY
s2.sno
,s1.sno
)
SELECT
*
FROM
groupby_s1_to_s2
UNION ALL
SELECT
*
FROM
groupby_s2_to_s1
;
【method_7.sqlの取得結果】
method_6.sqlとmethod_7.sqlの取得結果が一致しているか調べます。
WITH groupby_s1_to_s2 AS (
SELECT
s1.sno AS s1
,s2.sno AS s2
,COUNT(s1.PNO) AS s1cnt
,COUNT(s2.PNO) AS s2cnt
FROM
subparts s1
LEFT OUTER JOIN subparts s2
ON
s1.sno < s2.sno--自身以外の供給業者のなかで
AND s1.PNO = s2.PNO--自身と同じ部品をもち
WHERE
s2.sno IS NOT NULL--自身より大きい文字コードをもつ供給業者が存在しないのレコードは除く
GROUP BY
s1.sno
,s2.sno
HAVING
COUNT(DISTINCT s1.PNO) = (--自身の部品の種類数と同じ数だけ、相方の部品の種類数も存在している
SELECT
COUNT(DISTINCT s3.PNO)
FROM
subparts s3
WHERE
s2.sno = s3.sno--自身の相方に含まれる部品の種類数をカウント
)
AND COUNT(DISTINCT s2.PNO) = (
SELECT
COUNT(DISTINCT s4.PNO)
FROM
subparts s4
WHERE
s1.sno = s4.sno--相方から見ても自身に含まれる部品の種類数をカウント
)
ORDER BY
s1.sno
,s2.sno
)
, groupby_s2_to_s1 AS (
SELECT
s2.sno as s2
,s1.sno as s1
,COUNT(s1.PNO) AS s1cnt
,COUNT(s2.PNO) AS s2cnt
FROM
subparts s1
LEFT OUTER JOIN subparts s2
ON
s1.sno < s2.sno--自身以外の供給業者のなかで
AND s1.PNO = s2.PNO--自身と同じ部品をもち
WHERE
s2.sno IS NOT NULL--自身より大きい文字コードをもつ供給業者が存在しないのレコードは除く
GROUP BY
s2.sno
,s1.sno
HAVING
COUNT(DISTINCT s1.PNO) = (--自身の部品の種類数と同じ数だけ、相方の部品の種類数も存在している
SELECT
COUNT(DISTINCT s3.PNO)
FROM
subparts s3
WHERE
s2.sno = s3.sno--自身の相方に含まれる部品の種類数をカウント
)
AND COUNT(DISTINCT s2.PNO) = (
SELECT
COUNT(DISTINCT s4.PNO)
FROM
subparts s4
WHERE
s1.sno = s4.sno--相方から見ても自身に含まれる部品の種類数をカウント
)
ORDER BY
s2.sno
,s1.sno
)
,method_7 as (
SELECT
*
FROM
groupby_s1_to_s2
UNION ALL
SELECT
*
FROM
groupby_s2_to_s1
)
,method_6 as (
SELECT
s1.sno AS s1
,s2.sno AS s2
,count(s1.pno) as s1cnt
,count(s2.pno) as s2cnt
FROM
subparts s1
FULL OUTER JOIN subparts s2
ON
s1.sno <> s2.sno
AND s1.pno = s2.pno
GROUP BY
s1.sno
,s2.sno
HAVING
COUNT(DISTINCT s1.pno) = (--自身の部品の種類数と同じ数だけ、相方の部品の種類数も存在している
SELECT
COUNT(DISTINCT s3.pno)
FROM
subparts s3
WHERE
s2.sno = s3.sno--自身の相方に含まれる部品の種類数をカウント
)
AND COUNT(DISTINCT s2.pno) = (
SELECT
COUNT(DISTINCT s4.pno)
FROM
subparts s4
WHERE
s1.sno = s4.sno--相方から見ても自身に含まれる部品の種類数をカウント
)
ORDER BY
s1.sno
,s2.sno
)
select
*
from
(
(
select
*
from
method_6
minus
select
*
from
method_7
)
union all
(
select
*
from
method_7
minus
select
*
from
method_6
)
) sub
;
【chk.sqlの取得結果】
あとがき
集合演算よりもmerge文のdelete文とかの方がインパクトでかかった。。
以上、ありがとうございました。