-- プレイリスト間で共通する曲数を計算
WITH playlist_tracks AS (
SELECT
playlist_id,
track_id
FROM
log_data -- プレイリストと曲のデータ
),
pairwise_common_tracks AS (
SELECT
a.playlist_id AS playlist_a,
b.playlist_id AS playlist_b,
COUNT(*) AS common_track_count -- 共通する曲の数
FROM
playlist_tracks a
JOIN
playlist_tracks b ON a.track_id = b.track_id
WHERE
a.playlist_id < b.playlist_id -- 重複を避ける
GROUP BY
a.playlist_id, b.playlist_id
)
SELECT
playlist_a,
playlist_b,
common_track_count
FROM
pairwise_common_tracks
ORDER BY
common_track_count DESC;
WITH playlist_tracks AS (
SELECT
playlist_id,
track_id,
SUM(play_count) AS total_play_count
FROM
log_data
GROUP BY
playlist_id, track_id
),
pairwise_common_tracks AS (
SELECT
a.playlist_id AS playlist_a,
b.playlist_id AS playlist_b,
COUNT(*) AS common_track_count, -- 共通する曲の数
SUM(a.total_play_count + b.total_play_count) AS total_common_play_count -- 再生頻度の合計
FROM
playlist_tracks a
JOIN
playlist_tracks b ON a.track_id = b.track_id
WHERE
a.playlist_id < b.playlist_id
GROUP BY
a.playlist_id, b.playlist_id
)
SELECT
playlist_a,
playlist_b,
common_track_count,
total_common_play_count,
-- 関係性スコアの計算: 曲の被り数と再生頻度の重み付け
common_track_count * 0.6 + total_common_play_count * 0.4 AS relationship_score
FROM
pairwise_common_tracks
ORDER BY
relationship_score DESC;
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
CSVファイルの読み込み
csv_file = "playlist_relationships.csv" # CSVファイルのパス
df = pd.read_csv(csv_file)
NetworkX グラフオブジェクトの作成
G = nx.Graph()
エッジを追加(CSVから読み込む)
for _, row in df.iterrows():
G.add_edge(
row["playlist_a"],
row["playlist_b"],
weight=row["relationship_score"]
)
ノードの位置を計算
pos = nx.spring_layout(G, seed=42)
グラフの描画設定
plt.figure(figsize=(10, 8))
nx.draw_networkx_nodes(G, pos, node_size=700, node_color="skyblue")
nx.draw_networkx_edges(G, pos, width=[d["weight"] / 10 for _, _, d in G.edges(data=True)])
nx.draw_networkx_labels(G, pos, font_size=12, font_color="black")
エッジラベル(スコア)を追加
edge_labels = nx.get_edge_attributes(G, "weight")
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=10)
グラフを表示
plt.title("Playlist Network Based on Common Artists")
plt.axis("off")
plt.show()