SQL
DB

階層構造(入れ子集合モデル)について

概要

階層構造を持つDBで入れ子集合モデルについて調べた内容をメモする。

RDBの階層構造

いろいろなパターンが存在する。

隣接リスト

経路列挙型

入れ子区間モデル

入れ子集合モデル

などなど
とりあえず、隣接と経路はなんとなくやったことがあるので入れ子集合についてさらに調べてみる。

とりあえずやってみる

下記サイトを参考に試してみる。
2章 Naive Trees(素朴な木)

テーブルとデータ作成

CREATE TABLE staffs(
    id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY,
    name VARCHAR(32) NOT NULL,
    lft INTEGER NOT NULL,
    rgt INTEGER NOT NULL,
    CHECK (lft < rgt)
);
INSERT INTO staffs (name, lft, rgt) VALUES ('足立',  1, 14);
INSERT INTO staffs (name, lft, rgt) VALUES ('猪狩',  2,  3);
INSERT INTO staffs (name, lft, rgt) VALUES ('上田',  4, 13);
INSERT INTO staffs (name, lft, rgt) VALUES ('江崎',  5,  8);
INSERT INTO staffs (name, lft, rgt) VALUES ('木島',  6,  7);
INSERT INTO staffs (name, lft, rgt) VALUES ('大神',  9,  10);
INSERT INTO staffs (name, lft, rgt) VALUES ('加藤', 11,  12);

INSERT INTO staffs (name, lft, rgt) VALUES ('足立',  1,  4);
INSERT INTO staffs (name, lft, rgt) VALUES ('猪狩',  2,  3);
INSERT INTO staffs (name, lft, rgt) VALUES ('上田',  5, 14);
INSERT INTO staffs (name, lft, rgt) VALUES ('江崎',  6,  9);
INSERT INTO staffs (name, lft, rgt) VALUES ('木島',  7,  8);
INSERT INTO staffs (name, lft, rgt) VALUES ('大神', 10,  11);
INSERT INTO staffs (name, lft, rgt) VALUES ('加藤', 12,  13);

最下層の一覧

SELECT
    id, name, lft, rgt
FROM
    staffs Boss
WHERE
    NOT EXISTS (
        SELECT
            id
        FROM
            staffs Workers
        WHERE
            Workers.lft > Boss.lft 
            AND Workers.lft < Boss.rgt
    )
;

最上部の一覧

SELECT 
    id, name, lft, rgt
FROM
    staffs Workers
WHERE
    NOT EXISTS (
        SELECT
            id
        FROM
            staffs Boss
        WHERE
            Workers.lft > Boss.lft 
            AND Workers.lft < Boss.rgt
    )
;

ノードの深さを計算

SELECT
    Workers.id,
    Workers.name,
    COUNT(Boss.id) AS level
FROM 
    staffs Boss,
    staffs Workers
WHERE
    Workers.lft BETWEEN Boss.lft AND Boss.rgt
GROUP BY
    Workers.id,
    Workers.name
;

ノードの最大深さを計算する

SELECT
    MAX(level) AS height
FROM (
    SELECT
        Workers.name,
        COUNT(Boss.id) AS level
    FROM 
        staffs Boss,
        staffs Workers
    WHERE
        Workers.lft BETWEEN Boss.lft AND Boss.rgt
    GROUP BY
        Workers.id,
        Workers.name
) tmp;

階層をインデントで表現する

SELECT
    Mgrs.id,
    Mgrs.name,
    LPAD (
       Mgrs.name,
       LENGTH(Mgrs.name) + COUNT(Mgrs.id) + 1,
       ' '
    ) AS label
FROM
    staffs Mgrs,
    staffs MidMgrs,
    staffs Workers
 WHERE
   Mgrs.lft BETWEEN MidMgrs.lft AND MidMgrs.rgt
   AND MidMgrs.lft BETWEEN Workers.lft AND Workers.rgt
GROUP BY
   Mgrs.id, Mgrs.name, Mgrs.lft
ORDER BY
    MAX(Mgrs.lft)
;

親から見た場合の子供

SELECT
    Boss.id AS boss_id,
    Boss.name AS boss,
    Worker.name AS worker 
FROM
    staffs Boss
LEFT OUTER JOIN staffs Worker
    ON Boss.lft = (
        SELECT
            MAX(lft)
        FROM
            staffs
        WHERE
            Worker.lft > lft
            AND Worker.lft < rgt
    )
;

子から見た場合の親

SELECT
    Boss.name AS boss,
    Worker.name AS worker 
FROM
    staffs Worker
LEFT OUTER JOIN staffs Boss
    ON Boss.lft < Worker.lft
       AND Boss.lft = (
           SELECT 
               MAX(lft)
           FROM
               staffs
           WHERE
                Worker.lft > lft
                AND Worker.lft < rgt
       )
;

親の持つ子供の数

SELECT
    Boss.id AS boss_id,
    Boss.name AS boss,
    COUNT(Worker.id) AS cnt
FROM 
    staffs Boss
LEFT OUTER JOIN staffs Worker
    ON Boss.lft = (
        SELECT
            MAX(lft)
        FROM
            staffs 
        WHERE
            Worker.lft > lft
            AND Worker.lft < rgt
    )
GROUP BY
    Boss.id,
    Boss.name
;

部分木を求める

SELECT
    Boss.name AS boss,
    Workers.name AS worker
FROM 
    staffs Boss, 
    staffs Workers
WHERE
    Workers.lft > Boss.lft 
    AND Workers.lft < Boss.rgt
;

パスを列挙する(列持ちバージョン)

SELECT
    t0.name,
    CASE 
        WHEN t1.name IS NULL AND t2.name IS NULL AND t3.name IS NULL THEN t0.name
        WHEN t2.name IS NULL AND t3.name IS NULL THEN CONCAT(t1.name, '/', t0.name)
        WHEN t3.name IS NULL THEN CONCAT(t2.name, '/', t1.name, '/', t0.name)
        ELSE CONCAT(t3.name, '/', t2.name, '/', t1.name, '/', t0.name)
        END AS path
FROM 
    staffs t0
LEFT OUTER JOIN staffs t1
    ON t1.lft = (
        SELECT 
            MAX(lft)
        FROM 
            staffs
        WHERE 
            t0.lft > lft 
            AND t0.lft < rgt
    )
LEFT OUTER JOIN staffs t2
    ON t2.lft = (
        SELECT 
            MAX(lft)
        FROM 
            staffs
        WHERE 
            t1.lft > lft 
            AND t1.lft < rgt
    )
LEFT OUTER JOIN staffs t3
    ON t3.lft = (
        SELECT 
            MAX(lft)
        FROM 
            staffs
        WHERE 
            t2.lft > lft 
            AND t2.lft < rgt
    )
;

ノード間のパスを検索する (単調下降の場合はOK)

SELECT 
    O2.name,
    (O2.rgt - O2.lft) AS path_order
FROM 
    staffs O1, 
    staffs O2, 
    staffs O3
WHERE 
    O1.id = 1
    AND O3.id = 5
    AND O2.lft BETWEEN O1.lft AND O1.rgt
    AND O3.lft BETWEEN O2.lft AND O2.rgt
;

ノード同士の関係性を表示する

SELECT 
    CASE 
       WHEN O1.id = O2.id THEN CONCAT(O1.id, ' = ', O2.id)
       WHEN O1.lft BETWEEN O2.lft AND O2.rgt THEN CONCAT(O1.id, ' は ', O2.id, ' の部下')
       WHEN O2.lft BETWEEN O1.lft AND O1.rgt THEN CONCAT(O2.id, ' は ', O1.id, ' の部下')
       ELSE CONCAT(O1.id, ' は ', O2.id, ' と無関係')
       END AS label
FROM
    staffs O1,
    staffs O2
WHERE 
    O1.id = 6
    AND O2.id = 6
;

左端座標と右端座標の和集合

CREATE VIEW LftRgt(seq)
AS
SELECT lft FROM OrgChart
UNION ALL
SELECT rgt FROM OrgChart;

歯抜けチェック

SELECT
    CASE 
        WHEN COUNT(*) <> MAX(seq) - MIN(seq) + 1 
        THEN '歯抜けあり'
        ELSE '連続'
        END AS gap
FROM 
    LftRgt
;

特定のグループを削除(所属するメンバーも一緒に削除)

DELETE FROM staffs 
WHERE 
    lft BETWEEN (
        SELECT lft FROM (
            SELECT lft FROM staffs WHERE id = '4'
        ) AS tmp1
    ) AND (
        SELECT rgt FROM (
            SELECT rgt FROM staffs WHERE id = '4'
        ) AS tmp2
    )
;

MySQLでERROR:1093を回避する

ちょっと使ってみて・・・

入れ子集合モデルを使わなくて良くなったので、とりあえずここまでで終了
(時間のある時にまた触ってみたいと思う...)

使ってみての疑問

  • 大量のデータ(マスタ)を流し込む時、lft,rgtカラムに設定する値をどうやって算出するのかなど

以上