More than 1 year has passed since last update.

Spotifyのプレイリストの楽曲データをChatGPT等で分析して、PPTを作成するツールを作ってみた。

Posted at 2023-06-07

音楽聴くのが好きなので、Spotifyを愛用しているのですが、Spotify APIを使うと様々なデータを見れるので、自動でプレイリストの分析をして、パワーポイントを出力するツールを作ってみました。

作ったツールは、Spotifyのプレイリストの楽曲データを取得して、グラフ化して、ChatGPTがグラフの説明をするパワーポイントを作成するツールです。

今回もツールの単発使用なので、Google colabを使用します。

手順は、「Spotifyのプレイリストの楽曲データを取得して、グラフ化して、ChatGPTがグラフの説明をするパワーポイントを作成するツールを実行する」だけです。

1. Spotifyのプレイリストの楽曲データを取得して、グラフ化して、ChatGPTがグラフの説明をするパワーポイントを作成するツールを実行する

まず、日本語フォントと必要なインストールを以下のように行います。

!apt-get install -y fonts-ipaexfont-gothic
!apt-get install -y fonts-ipafont-gothic

!pip install --upgrade pip
!pip install setuptools wheel
!pip install matplotlib seaborn pandas python-pptx openai spotipy

次にSpotifyの認証のコードを実行します。
Spotify APIのクライアントIDとシークレット、リダイレクトURLを設定してから以下のコードを実行します。
実行後に生成された認証URLにアクセスして認証を行います。認証後にリダイレクトのページが開かれるので、そのページのURLをコピーして、実行したコードの下に現れる入力欄にペーストします。
※先にSpotify APIのダッシュボードで、クライアントIDとシークレットの取得と、リダイレクトURLの設定が必要です。少しUIが変わりましたが、こちらを参考にしてください。

import spotipy
from spotipy.oauth2 import SpotifyOAuth

# Spotify APIのクライアントIDとシークレット、リダイレクトURLを設定
client_id = ''
client_secret = ''
redirect_uri = ''

# 必要なスコープを指定
scope = 'user-read-recently-played playlist-read-private playlist-read-collaborative app-remote-control user-read-playback-state user-library-read user-modify-playback-state playlist-modify-public playlist-modify-private'

# 認証情報を取得するためのオブジェクトを作成
sp_oauth = SpotifyOAuth(client_id=client_id, client_secret=client_secret, redirect_uri=redirect_uri, scope=scope)

# ユーザーに認証を求めるために認証ページのURLを表示
auth_url = sp_oauth.get_authorize_url()

# ユーザーが認証後、自動的にアクセストークンを取得
token_info = sp_oauth.get_cached_token()
if not token_info:
    print(auth_url)
    text = input("Enter the URL you were redirected to: ")
    code_idx = text.find("?code=")
    url = text[code_idx + 6 : ]

    code = sp_oauth.parse_response_code(url)
    token_info = sp_oauth.get_access_token(code)
    # アクセストークンを取得し、認証オブジェクトを作成
    token = token_info['access_token']
    refresh_token = token_info['refresh_token']
    sp = spotipy.Spotify(auth=token)

最後にOpenAIのAPIキーを設定してから、以下のコードを実行します。
実行後に生成されたフォームには好きなSpotifyのプレイリストのURLを入れてください。

import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import pandas as pd
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
from pptx.dml.color import RGBColor
from pptx.enum.shapes import MSO_SHAPE
from pptx.enum.text import MSO_ANCHOR
import os
import requests
from io import BytesIO
from PIL import Image
import openai
from datetime import datetime


# OpenAIのキー
openai.api_key = "" 


# フォントを設定
cache_dir = matplotlib.get_cachedir()

# ディレクトリが存在しない場合に作成
if not os.path.exists(cache_dir):
    os.makedirs(cache_dir)

# フォントキャッシュをクリア
for f in os.listdir(cache_dir):
    if f.startswith('fontlist-v'):
        os.remove(os.path.join(cache_dir, f))

# フォントマネージャーに新たなフォントを追加
matplotlib.font_manager.fontManager.addfont('/usr/share/fonts/opentype/ipafont-gothic/ipag.ttf')

# フォントを設定
matplotlib.rcParams['font.family'] = 'IPAGothic'


# SpotifyのプレイリストIDを取得
url =  input("Enter the Playlist URL: ")
playlist_id = url.split('/')[-1].split('?')[0]

# Spotifyのプレイリスト情報を取得
playlist_info = sp.playlist(playlist_id)

# プレイリスト情報から名前を取得
playlist_name = playlist_info['name']

# プレイリストの最初の画像のURLを取得
playlist_image_url = playlist_info["images"][0]["url"]  

# データフレームを初期化
df = pd.DataFrame(columns=['name', 'artist', 'key', 'mode', 'energy', 'tempo'])

# トラック情報を取得する関数を定義
def get_all_tracks(playlist_id):
    results = sp.playlist_tracks(playlist_id, limit=100)
    tracks = results['items']
    
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    
    return tracks

# すべてのトラック情報を取得
all_tracks = get_all_tracks(playlist_id)

# トラック情報をループします
for item in all_tracks:
    track = item['track']
    track_id = track['id']
    features = sp.audio_features(track_id)[0]

    # データフレームに追加します
    df = pd.concat([df, pd.DataFrame([{
    'name': track['name'],
    'artist': track['artists'][0]['name'],
    'key': features['key'],
    'mode': features['mode'],
    'energy': features['energy'],
    'tempo': features['tempo']
}])], ignore_index=True)

# キーコードと対応する音名
key_dict = {
    0: 'C',
    1: 'C♯,D♭',
    2: 'D',
    3: 'D♯,E♭',
    4: 'E',
    5: 'F',
    6: 'F♯,G♭',
    7: 'G',
    8: 'G♯,A♭',
    9: 'A',
    10: 'A♯,B♭',
    11: 'B'
}
df['key'] = df['key'].map(key_dict)  # キーコードを音名に変換


# プレゼンテーションを作成
prs = Presentation()
prs.slide_width = Inches(16)  # 幅を設定
prs.slide_height = Inches(9)  # 高さを設定


# タイトルスライドを作成
slide_layout = prs.slide_layouts[6]  # タイトルスライドのレイアウト
slide0 = prs.slides.add_slide(slide_layout)

# プレイリストの画像を取得し、保存
response = requests.get(playlist_image_url)
img = Image.open(BytesIO(response.content))
img.save('playlist_image.png')

# 画像の高さをスライドの高さの6/9に調整し、アスペクト比を保つ
with Image.open('playlist_image.png') as img:
    width, height = img.size
    aspect_ratio = width / height
    new_height = prs.slide_height * 6 / 9
    new_width = new_height * aspect_ratio

left = prs.slide_width - new_width - Inches(0.5)  # 微妙な余白を追加して右に配置
top = (prs.slide_height - new_height) / 2
slide0.shapes.add_picture('playlist_image.png', left, top, new_width, new_height)

# タイトルのテキストボックス
left = Inches(0.5)  # 微妙な余白を追加
width = prs.slide_width - new_width - Inches(1)  # 画像の横幅とスライドの横幅の差分から微妙な余白を引く
height = Inches(0.9)
top = (prs.slide_height - height) / 2 - Inches(1)  # 縦は中央揃え
title_textbox = slide0.shapes.add_textbox(left, top, width, height)
title_tf = title_textbox.text_frame
title_tf.text = playlist_name
title_tf.paragraphs[0].alignment = PP_ALIGN.CENTER
title_tf.paragraphs[0].runs[0].font.bold = True
title_tf.paragraphs[0].runs[0].font.size = Pt(48)  # 大きくする

# サブタイトルのテキストボックス
top += height  # タイトルの真下
subtitle_textbox = slide0.shapes.add_textbox(left, top, width, height)
subtitle_tf = subtitle_textbox.text_frame
subtitle_tf.text = "Analysis and Visualization"
subtitle_tf.paragraphs[0].alignment = PP_ALIGN.CENTER
subtitle_tf.paragraphs[0].runs[0].font.bold = True
subtitle_tf.paragraphs[0].runs[0].font.size = Pt(36)  # タイトルより少し小さい
subtitle_tf.paragraphs[0].runs[0].font.color.rgb = RGBColor(128, 128, 128)  # タイトルより少し薄い色

# 現在の日時を取得
now = datetime.now()
created_at = now.strftime("%Y-%m-%d")

# URLテキストボックス
top = Inches(7)  
url_textbox = slide0.shapes.add_textbox(left, top, width, height)
url_tf = url_textbox.text_frame
p = url_tf.add_paragraph()
r = p.add_run()
r.text = f"Created at: {created_at}\nPlaylist: "
r.font.bold = True
r.font.size = Pt(32)  # 少し大きめ
r.font.color.rgb = RGBColor(125, 125, 125)  # サブタイトルと同じ色

# URLのテキストを追加
r = p.add_run()
r.text = "HERE"
r.font.bold = True
r.font.size = Pt(32)  # 少し大きめ
r.font.color.rgb = RGBColor(125, 125, 125)  # サブタイトルと同じ色

# URLをハイパーリンクとして追加
hyperlink = r.hyperlink
hyperlink.address = url


# グラフの説明をChatGPTが生成する関数
def generate_description(graph_info, data_info, data_sample, sample_size, total_size):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": f"1. I have a graph which is {graph_info}. The graph is created using all the {total_size} data points. Can you help me generate a bulleted description for the graph, in Japanese? \n2. And give you a sample of {sample_size} data points, which is randomly selected, from a total of {total_size} data points from the graph. Here is the sample data: \n{data_sample}\nCan you help me generate a bulleted prediction based on this sample data, focusing on data analysis, in Japanese? Generated only description."},
        ]
    )
    return response.choices[0].message['content']

# ChatGPTに渡すサンプル数の設定
sample_size = min(len(df), 100)

# グラフを説明するプロンプト
graph_info_list = [
    f"a pairplot of 'Energy vs Tempo' from the Spotify playlist {playlist_name}",
    f"a violin plot of 'Energy by Key' from the Spotify playlist {playlist_name}",
    f"a violin plot of 'Tempo by Key' from the Spotify playlist {playlist_name}"
]

# グラフ画像のリスト
img_file_list = ["graph1.png", "graph2.png", "graph3.png"]

# グラフ画像とグラフ説明の生成
for i in range(3):
    # 画像の保存
    plt.figure(figsize=(10, 8))
    if i == 0:
        data_points = len(df)
        pairplot = sns.pairplot(df[['energy', 'tempo']])
        pairplot.fig.suptitle(f'Energy vs Tempo({playlist_name})\n' + f'Data Points: {data_points}', y=1.02, ha='center', va='center')
    else:
        data_points = df.groupby('key').size().values
        sns.violinplot(x='key', y=['energy', 'tempo'][i-1], data=df)
        plt.title(f"{['Energy by Key', 'Tempo by Key'][i-1]}({playlist_name})")
        plt.xlabel('Key')
        plt.ylabel(['Energy', 'Tempo'][i-1])
        plt.xticks(rotation=45)
        plt.annotate('Data Points: ' + str(data_points) + '\nTotal: ' + str(sum(data_points)), xy=(0.5, 0.95), xycoords='axes fraction', ha='center', va='center', bbox=dict(boxstyle='round', facecolor='white'))
    plt.tight_layout()
    plt.savefig(img_file_list[i], bbox_inches='tight')
    
    # グラフの説明を生成
    data_info = f"{data_points} data points showing the {['energy and tempo', 'energy by key', 'tempo by key'][i]} values for each track in the playlist '{playlist_name}'"
    data_sample = df[[['energy', 'tempo'], ['key', 'energy'], ['key', 'tempo']][i]].sample(sample_size).to_string()
    description = generate_description(graph_info_list[i], data_info, data_sample, sample_size, len(df))

    # 新しいスライドを追加
    slide_layout = prs.slide_layouts[6]  # 空白スライドのレイアウト
    slide = prs.slides.add_slide(slide_layout)

    # グラフの画像を追加
    img_path = img_file_list[i]
    with Image.open(img_path) as img:
        width, height = img.size
        aspect_ratio = width / height
        new_height = prs.slide_height * 7 / 9
        new_width = new_height * aspect_ratio

    left = Inches(0.5)  # 微妙な余白を追加
    top = (prs.slide_height - new_height) / 2
    picture = slide.shapes.add_picture(img_path, left, top, new_width, new_height)

    # 説明を追加
    left = new_width + Inches(1)  # 画像の右に配置、微妙な余白を追加
    top = (prs.slide_height - new_height) / 2  # 縦方向は中央揃え
    width = prs.slide_width - new_width - Inches(2)  # 横幅は画像の横幅とスライドの横幅の差分から少し余白を引いたものにする
    height = new_height  # 高さは画像と同じにする
    txBox = slide.shapes.add_textbox(left, top, width, height)

    # テキストを中央揃えにし、折り返す
    tf = txBox.text_frame
    tf.text = description
    tf.vertical_anchor = MSO_ANCHOR.MIDDLE
    tf.paragraphs[0].runs[0].font.size = Pt(20)  
    tf.word_wrap = True


# 背景色を設定する関数
def set_background_color(slide, color):
    background = slide.background
    fill = background.fill
    fill.solid()
    fill.fore_color.rgb = RGBColor(*color)

# 色をRGB形式で定義（例：薄い青色）
color = (230, 240, 255)

# 全てのスライドに背景色を設定
for slide in prs.slides:
    set_background_color(slide, color)


# プレゼンテーションを保存
prs.save(f"Analyzing {playlist_name}.pptx")

# プレイリストの画像を削除
os.remove('playlist_image.png')

# グラフの画像を削除
for img_file in img_file_list:
    if os.path.isfile(img_file):
        os.remove(img_file)

実行後にパワーポイント(タイトルスライド＋分析スライド×3)が生成されていたら成功です！

私が作ったComplete Songs of Moonchildのプレイリストだと以下のようになります。

このツールを作っていて面白かったのは、的外れなところはあるものの、サンプルデータをChatGPTに渡すとある程度、ChatGPTがデータ分析を行う事ですね。
サンプルデータは100以下渡すようにしている事もあり、プレイリストのトラック数が100以下で、トラック数が少ないほど、分析がまともになる気がします。

最後に

SpotifyもOpenAIも出来ることが多くとても楽しいですね。
また何か作ってみます。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up