More than 1 year has passed since last update.

プログラミング (Python) で作曲してみた

Last updated at 2022-07-20Posted at 2022-03-31

はじめに

本日（3月31日）は、バッハの誕生日！
ということで、Megenta の Polyphony RNN を使えば、バッハ風の曲を誰でもつくることができます。なので、Pythonでヴァンパイアという曲を学習させて、クラシック調（バッハ風）のオリジナル曲に挑戦してみました。

プログラム

Google Colab で触れるようにしていますので、良ければご活用ください
https://colab.research.google.com/github/TakayukiNJ/vanpireBach/blob/main/vanpireBach.ipynb

YouTubeでの解説：https://youtu.be/bnilVY-H5xE
※ 2021年4月1日18時に公開

GANSynth で音色を変えてみる

まずは使用するライブラリのインストール

!apt-get update -qq && apt-get install -qq libfluidsynth1 fluid-soundfont-gm build-essential libasound2-dev libjack-dev
!pip install -qU pyfluidsynth pretty_midi
!pip install -qU magenta

次に、ライブラリの導入のプログラム

import numpy as np
import matplotlib.pyplot as plt
from google.colab import files
import tensorflow.compat.v1 as tf
import librosa
import magenta.music as mm
from magenta.models.gansynth.lib import flags as lib_flags
from magenta.models.gansynth.lib import generate_util as gu
from magenta.models.gansynth.lib import model as lib_model
from magenta.models.gansynth.lib import util
from note_seq.notebook_utils import colab_play as play
import note_seq

次に、曲生成の設定（４つの関数）

BATCH_SIZE = 16  # 一度に扱うデータ数
SR = 16000  # サンプリングレート

def load_midi(midi_path, min_pitch=36, max_pitch=84):
  # 音声を処理する関数
  """Load midi as a notesequence."""
  midi_path = util.expand_path(midi_path)
  ns = note_seq.midi_file_to_sequence_proto(midi_path)
  pitches = np.array([n.pitch for n in ns.notes])
  velocities = np.array([n.velocity for n in ns.notes])
  start_times = np.array([n.start_time for n in ns.notes])
  end_times = np.array([n.end_time for n in ns.notes])
  valid = np.logical_and(pitches >= min_pitch, pitches <= max_pitch)
  notes = {'pitches': pitches[valid],
           'velocities': velocities[valid],
           'start_times': start_times[valid],
           'end_times': end_times[valid]}
  return ns, notes

def get_envelope(t_note_length, t_attack=0.010, t_release=0.3, sr=16000):
  """Create an attack sustain release amplitude envelope."""
  t_note_length = min(t_note_length, 3.0)
  i_attack = int(sr * t_attack)
  i_sustain = int(sr * t_note_length)
  i_release = int(sr * t_release)
  i_tot = i_sustain + i_release  # attack envelope doesn't add to sound length
  envelope = np.ones(i_tot)
  # Linear attack
  envelope[:i_attack] = np.linspace(0.0, 1.0, i_attack)
  # Linear release
  envelope[i_sustain:i_tot] = np.linspace(1.0, 0.0, i_release)
  return envelope

def combine_notes(audio_notes, start_times, end_times, velocities, sr=16000):
  """Combine audio from multiple notes into a single audio clip.

  Args:
    audio_notes: Array of audio [n_notes, audio_samples].
    start_times: Array of note starts in seconds [n_notes].
    end_times: Array of note ends in seconds [n_notes].
    sr: Integer, sample rate.

  Returns:
    audio_clip: Array of combined audio clip [audio_samples]
  """
  n_notes = len(audio_notes)
  clip_length = end_times.max() + 3.0
  audio_clip = np.zeros(int(clip_length) * sr)

  for t_start, t_end, vel, i in zip(start_times, end_times, velocities, range(n_notes)):
    # Generate an amplitude envelope
    t_note_length = t_end - t_start
    envelope = get_envelope(t_note_length)
    length = len(envelope)
    audio_note = audio_notes[i, :length] * envelope
    # Normalize
    audio_note /= audio_note.max()
    audio_note *= (vel / 127.0)
    # Add to clip buffer
    clip_start = int(t_start * sr)
    clip_end = clip_start + length
    audio_clip[clip_start:clip_end] += audio_note

  # Normalize
  audio_clip /= audio_clip.max()
  audio_clip /= 2.0
  return audio_clip

def specplot(audio_clip):
  # Plotting tools
  p_min = np.min(36)
  p_max = np.max(84)
  f_min = librosa.midi_to_hz(p_min)
  f_max = 2 * librosa.midi_to_hz(p_max)
  octaves = int(np.ceil(np.log2(f_max) - np.log2(f_min)))
  bins_per_octave = 36
  n_bins = int(bins_per_octave * octaves)
  C = librosa.cqt(audio_clip, sr=SR, hop_length=2048, fmin=f_min, n_bins=n_bins, bins_per_octave=bins_per_octave)
  power = 10 * np.log10(np.abs(C)**2 + 1e-6)
  plt.matshow(power[::-1, 2:-2], aspect='auto', cmap=plt.cm.magma)
  plt.yticks([])
  plt.xticks([])

次に、学習済みモデルを読み込み

tf.disable_v2_behavior()  # tensorflow2で1.xのコードを動かす
tf.reset_default_graph()  # tensorflowのグラフをリセット

model_dir = "gs://magentadata/models/gansynth/acoustic_only"
flags = lib_flags.Flags({
    "batch_size_schedule": [BATCH_SIZE],
    "tfds_data_dir": "gs://tfds-data/datasets",
})
model = lib_model.Model.load_from_path(model_dir, flags)

次に、ファイルの読み込み（Google Colab を使用していることを想定）

midi_path = "/content/Vampire_Melo_164BPM.mid"
ns, notes = load_midi(midi_path)

note_seq.plot_sequence(ns)
note_seq.play_sequence(ns, synth=note_seq.fluidsynth)

次に、音色を変更

seconds_per_instrument = 5  # 楽器が切り替わる間隔
z_instruments, t_instruments = gu.get_random_instruments(  # 潜在変数とその時間
    # 潜在変数がランダムにゆっくりと変化
    model,
    notes["end_times"][-1],
    secs_per_instrument=seconds_per_instrument)

z_notes = gu.get_z_notes(notes["start_times"], z_instruments, t_instruments) # 各noteの潜在変数を取得

audio_notes = model.generate_samples_from_z(z_notes, notes["pitches"]) # 各ノートの音声を生成

audio = combine_notes(
    # 1つの音声にまとめる
    audio_notes,
    notes["start_times"],
    notes["end_times"],
    notes["velocities"]
    )

specplot(audio)  # スペクトログラムの表示
play(audio, sample_rate=SR)

完成サンプル（3分）

音声をwavデータに変換してダウンロード

file_name = "atashiVanpire.wav"
gu.save_wav(audio, file_name)
files.download(file_name)

Polyphony RNN で作曲してみる

変数 ns の中身が良い感じに使えそうなのを確認

print(ns)

次に、Polyphony RNN を使ってバッハ風の曲を作成

from magenta.models.performance_rnn import performance_sequence_generator
from magenta.models.shared import sequence_generator_bundle

note_seq.notebook_utils.download_bundle("performance_with_dynamics.mag", "/models/")  # Bundle（.magファイル）をダウンロード
bundle = sequence_generator_bundle.read_bundle_file("/models/performance_with_dynamics.mag")  # Bundleの読み込み
generator_map = performance_sequence_generator.get_generator_map()
performance_rnn = generator_map["performance_with_dynamics"](checkpoint=None, bundle=bundle)  # 生成器の設定
performance_rnn.initialize()  # 初期化

さいごに、オリジナル曲を作成

from note_seq.protobuf import generator_pb2

total_time = 360 # 曲の長さ（秒）
temperature = 1 # 曲の「ランダム度合い」を決める定数

base_end_time = max(note.end_time for note in ns.notes)  #ベース曲の終了時刻

generator_options = generator_pb2.GeneratorOptions()  # 生成器のオプション
generator_options.args["temperature"].float_value = temperature  # ランダム度合い
generator_options.generate_sections.add(
    start_time=base_end_time,  # 作曲開始時刻
    end_time=total_time)  # 作曲終了時刻

gen_seq = performance_rnn.generate(ns, generator_options) # 曲の生成

note_seq.plot_sequence(gen_seq)  # NoteSequenceの可視化
note_seq.play_sequence(gen_seq, synth=note_seq.fluidsynth)  # NoteSequenceの再生

MIDIデータとしてダウンロード

from google.colab import files
note_seq.sequence_proto_to_midi_file(gen_seq, "atashiBach.mid") # MIDIデータに変換し保存
files.download("atashiBach.mid") # ダウンロード

完成サンプル（3分）

参考文献まとめ

◆ Magenta (GoogleのAI) のサンプルコード
https://colab.research.google.com/notebooks/magenta/hello_magenta/hello_magenta.ipynb

◆ GANSynth (音色を変えるAI) https://colab.research.google.com/notebooks/magenta/gansynth/gansynth_demo.ipynb

◆ 音色を変更したサンプル音源
https://youtu.be/cjNFTLG4EUQ

◆ Polyphony RNN (バッハ風の曲を作成できるAI) https://github.com/magenta/magenta/tree/main/magenta/models/polyphony_rnn

◆ 参考にしたGitHub
https://github.com/yukinaga/ai_music

◆ 実際に作曲してみたサンプル音源
https://youtu.be/Bzk6Et16KHE

◆ ヴァンパイアのMIDIファイルのダウンロード先
https://otoiro.co.jp/special/

◆ ヴァンパイア本家様のYouTube
https://www.youtube.com/watch?v=e1xCOsgWG0M

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up