音楽を常時流すから、その音楽に反応するオーディオスペクトラムを作ってブチ上がろう計画

Last updated at 2024-04-30Posted at 2024-04-30

はじめに

備忘録的側面を多分に含む。

必要なものをインストール

VB-CABLE
Voicemeeterbanana

設定のサウンドの下にあるサウンドの詳細設定をクリックし、サウンドコントロールパネルを表示する。ここでVoicemeeter AUX Inputを既定のデバイスに設定する。

オーディオスペクトラム実装

下記URLの記事を参考にさせていただきました。
https://qiita.com/akira2768922/items/72a1830fb5f84cc325fd

ソースコード↓

audio.py

import pyaudio
import numpy as np
import cv2

SAMPLE_RATE = 44100             # サンプリングレート
FRAME_SIZE = 2048               # フレームサイズ
INT16_MAX = 32767               # サンプリングデータ正規化用
SAMPLING_SIZE = FRAME_SIZE * 4  # サンプリング配列サイズ
WIDTH = 800     # 表示領域の幅
HEIGHT = 600    # 表示領域の高さ

# 周波数成分を表示用配列に変換する用の行列(spectram_array)作成
#   FFT結果（周波数成分の配列)から、どの要素を合計するかをまとめた行列
spectram_range = [int(22050 / 2 ** (i/10)) for i in range(100, -1,-1)]    # 21Hz～22,050Hzの間を分割
freq = np.abs(np.fft.fftfreq(SAMPLING_SIZE, d=(1/SAMPLE_RATE)))  # サンプル周波数を取得
spectram_array = (freq <= spectram_range[0]).reshape(1,-1)
for index in range(1, len(spectram_range)):
    tmp_freq = ((freq > spectram_range[index - 1]) & (freq <= spectram_range[index])).reshape(1,-1)
    spectram_array = np.append(spectram_array, tmp_freq, axis=0)

# 表示用の変数定義・初期化
part_w = WIDTH / len(spectram_range)
part_h = HEIGHT / 100
img = np.full((HEIGHT, WIDTH, 3), 0, dtype=np.uint8)



#内部音声指定用のコード

# マイク サンプリング開始
audio = pyaudio.PyAudio()
stream = audio.open(format=pyaudio.paInt16, channels=1, rate=SAMPLE_RATE, input=True,
                    input_device_index=4, frames_per_buffer=FRAME_SIZE)



# サンプリング配列(sampling_data)の初期化
sampling_data = np.zeros(SAMPLING_SIZE)
while True:
    # フレームサイズ分データを読み込み
    frame = stream.read(FRAME_SIZE)
    # サンプリング配列に読み込んだデータを追加
    frame_data = np.frombuffer(frame, dtype="int16") / INT16_MAX
    sampling_data = np.concatenate([sampling_data, frame_data])
    if sampling_data.shape[0] > SAMPLING_SIZE:
        # サンプリング配列サイズよりあふれた部分をカット
        sampling_data = sampling_data[sampling_data.shape[0] - SAMPLING_SIZE:]

    # 高速フーリエ変換（周波数成分に変換）
    fft = np.abs(np.fft.fft(sampling_data))

    # 表示用データ配列作成
    #   周波数成分の値を周波数を範囲毎に合計して、表示用データ配列(spectram_data)を作成
    spectram_data = np.dot(spectram_array, fft)

    # 出力処理
    cv2.rectangle(img, (0,0), (WIDTH, HEIGHT), (0,0,0), thickness=-1)   # 出力領域のクリア
    for index, value in enumerate(spectram_data):
        # 単色のグラフとして表示
        cv2.rectangle(img,
                      (int(part_w * (index + 0) + 1), int(HEIGHT)),
                      (int(part_w * (index + 1) - 1), int(max(HEIGHT - value/4, 0))),
                      (255, 0, 0), thickness=-1)
    # 画面表示
    cv2.imshow("AudioSpectrum", img)

    # 終了キーチェック
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q') or key == ord('Q') or key == 0x1b:
        break

# マイク サンプリング終了処理
stream.stop_stream()
stream.close()
audio.terminate()

33行目のinput_device_indexに指定する番号はCABLE Output (VB-Audio Virtualの番号(ここでは仮にX_NUMとする)を割り当てること。

X_NUMを求めるコード

index_num.py

import pyaudio

pa = pyaudio.PyAudio()
for i in range(pa.get_device_count()):
    print(i)
    print(pa.get_device_info_by_index(i)['name'])
    print('_____________________')

実行すると一覧がコンソールに表示される。そこにCABLE Output (VB-Audio Virtualがある。その番号をinput_device_indexに指定する。

Voicemeeterbananaのアプリを開いて、右上のA1、A2にライン(ここではAG03となってるはず)とCABLE Outputを指定する。

最後に

上記の手順を踏むことで実現可能なはずである。

改良したコード

上記は単色(青)かつ、横並びの棒グラフであったが、このコードは円形かつレインボーで表示される。見栄えはこちらのほうが圧倒的に良いはずである。

audio_kai.py

import pyaudio
import numpy as np
import cv2

SAMPLE_RATE = 44100             # サンプリングレート
FRAME_SIZE = 2048               # フレームサイズ
INT16_MAX = 32767               # サンプリングデータ正規化用
SAMPLING_SIZE = FRAME_SIZE * 4  # サンプリング配列サイズ
WIDTH = 800     # 表示領域の幅 デフォルトは800
HEIGHT = 800    # 表示領域の高さ デフォルトは600

# 周波数成分を表示用配列に変換する用の行列(spectram_array)作成
#   FFT結果（周波数成分の配列)から、どの要素を合計するかをまとめた行列
spectram_range = [int(22050 / 2 ** (i/10)) for i in range(100, -1,-1)]    # 21Hz～22,050Hzの間を分割
freq = np.abs(np.fft.fftfreq(SAMPLING_SIZE, d=(1/SAMPLE_RATE)))  # サンプル周波数を取得
spectram_array = (freq <= spectram_range[0]).reshape(1,-1)
for index in range(1, len(spectram_range)):
    tmp_freq = ((freq > spectram_range[index - 1]) & (freq <= spectram_range[index])).reshape(1,-1)
    spectram_array = np.append(spectram_array, tmp_freq, axis=0)

# 表示用の変数定義・初期化
part_w = WIDTH / len(spectram_range)
part_h = HEIGHT / 100
img = np.full((HEIGHT, WIDTH, 3), 0, dtype=np.uint8)

# マイク サンプリング開始
audio = pyaudio.PyAudio()
stream = audio.open(format=pyaudio.paInt16, channels=1, rate=SAMPLE_RATE, input=True,
                    input_device_index=4, frames_per_buffer=FRAME_SIZE)


# サンプリング配列(sampling_data)の初期化
sampling_data = np.zeros(SAMPLING_SIZE)
while True:
    # フレームサイズ分データを読み込み
    frame = stream.read(FRAME_SIZE)
    # サンプリング配列に読み込んだデータを追加
    frame_data = np.frombuffer(frame, dtype="int16") / INT16_MAX
    sampling_data = np.concatenate([sampling_data, frame_data])
    if sampling_data.shape[0] > SAMPLING_SIZE:
        # サンプリング配列サイズよりあふれた部分をカット
        sampling_data = sampling_data[sampling_data.shape[0] - SAMPLING_SIZE:]

    # 高速フーリエ変換（周波数成分に変換）
    fft = np.abs(np.fft.fft(sampling_data))

    # 表示用データ配列作成
    # 周波数成分の値を周波数を範囲毎に合計して、表示用データ配列(spectram_data)を作成
    spectram_data = np.dot(spectram_array, fft)

    # 出力処理
    cv2.rectangle(img, (0,0), (WIDTH, HEIGHT), (0,0,0), thickness=-1)   # 出力領域のクリア
    for index, value in enumerate(spectram_data):
        # 虹色のグラフとして表示
        hue = int((index / len(spectram_data)) * 179)
        color = cv2.cvtColor(np.uint8([[[hue, 255, 255]]]), cv2.COLOR_HSV2BGR)[0][0]
        color = color.astype(np.uint8)

        radius = 130 #半径を指定 デフォルトは80
        rad = (2 * np.pi) * (index / len(spectram_data))
        x1 = int(WIDTH / 2 + np.sin(rad) * radius)
        y1 = int(HEIGHT / 2 - np.cos(rad) * radius)
        rad = (2 * np.pi) * (index / len(spectram_data))
        x2 = int(WIDTH / 2 + np.sin(rad) * (radius + value/4))
        y2 = int(HEIGHT / 2 - np.cos(rad) * (radius + value/4))
        cv2.line(img, (x1, y1), (x2, y2), (int(color[0]), int(color[1]), int(color[2])), thickness=2)

        rad = (2 * np.pi) * (index / len(spectram_data))
        x1 = int(WIDTH / 2 + np.sin(rad) * radius)
        y1 = int(HEIGHT / 2 - np.cos(rad) * radius)
        rad = (2 * np.pi) * (index / len(spectram_data))
        x2 = int(WIDTH / 2 - np.sin(rad) * (radius-260 + value/24))
        y2 = int(HEIGHT / 2 + np.cos(rad) * (radius-260 + value/24))
        cv2.line(img, (x1, y1), (x2, y2), (int(color[0]), int(color[1]), int(color[2])), thickness=2)

    # 画面表示
    cv2.imshow("AudioSpectrum", img)

    # 終了キーチェック
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q') or key == ord('Q') or key == 0x1b:
        break

# マイク サンプリング終了処理
stream.stop_stream()
stream.close()
audio.terminate()

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up