はじめに
この記事は、MIDIデータをCSVに書き出すために作ったプログラムです。
単に全データを書き出すだけではなく、
- 拍子・テンポ情報
- ノート情報
- マーカー情報
- テキスト情報
などを分け、わかりやすく保存することを目的としています。MIDIデータを扱うアプリケーションを開発するときの仲介としての位置づけです。
特に、MIDIデルタ時間(プログラム内では「拍子時間」と呼んでいます)から絶対時間(~~秒)の計算をあらかじめ行うことで、アプリケーション側でその計算を行わなくてよいことが利点です。
実装
以下にプログラムを公開しました。
長いですが、メインの処理は以下のような実装になっています。
import py_midicsv as pm
import argparse
import pandas as pd
import os
def midi_to_csv(input_path):
csv_string = pm.midi_to_csv(input_path)
df = pd.DataFrame([data.strip().split(", ") for data in csv_string]).map(lambda x: x.replace('"', '') if type(x) == str else x)
return df
def get_time(x):
m = int(x // 60)
s = x - (m * 60)
s_f = int(s // 1)
ss = int((s - s_f) * 100)
return f"{m:0d}:{s_f:02d}:{ss:02d}"
def main(args):
df = midi_to_csv(args.input)
os.makedirs(args.output_dir, exist_ok=True)
# TEMP
df.to_csv(os.path.join(args.output_dir, "all.csv"))
df[1] = df[1].astype(int)
d = 0
beat_n, beat_b = 0, 0
tempo = 0.0
t_now_d = 0
t_now_s = 0.0
t_d_1bar = 0
bar = 0
bars_data = []
# Extract data except notes
for idx, row in df.iterrows():
# ===== Header =====
if row[2] == "Header":
d = int(row[5])
continue
elif row[2] == "Title_t":
title = str(row[3])
continue
# ===== Content =====
if row[2] == "Tempo":
tempo = float(60 / (int(row[3]) * (10 ** -6)))
elif row[2] == "Time_signature":
if t_d_1bar != 0:
t_delta = int(row[1]) - t_now_d
bars = t_delta // t_d_1bar
for i in range(bars):
bar += 1
bars_data.append([bar, t_now_d + i * t_d_1bar, beat_n, beat_b, t_d_1bar])
beat_n = int(row[3])
beat_b = int(2 ** int(row[4]))
t_d_1bar = int(d / (beat_b / 4) * beat_n)
t_now_d = int(row[1])
elif row[2] == "End_track":
if t_d_1bar != 0:
t_delta = int(row[1]) - t_now_d
bars = t_delta // t_d_1bar
for i in range(bars + 10):
bar += 1
bars_data.append([bar, t_now_d + i * t_d_1bar, beat_n, beat_b, t_d_1bar])
t_now_d = int(row[1])
break
df_tempo = df[df[2] == "Tempo"].copy()
df_tempo["v"] = df_tempo[3].apply(lambda x: round(60 / (int(x) * (10 ** -6)), 2))
df_tempo = df_tempo[[1, "v"]].reset_index(drop=True)
df_tempo["next_change"] = list(df_tempo[1].values[1:]) + [9999999]
df_tempo["next_change"] = df_tempo["next_change"].astype(int)
df_tempo[1] = df_tempo[1].astype(int)
bars_tempo_data = []
tempo = 0.0
t_now_d = 0
chou = 0
major = 0
for idx, tempo_data in df_tempo.iterrows():
t_now_d = int(tempo_data[1])
tempo = tempo_data["v"]
next_change = tempo_data["next_change"]
if not t_now_d in [data[1] for data in bars_data]:
for i in range(len(bars_data)):
if bars_data[i][1] > t_now_d:
bar_t = i - 1
break
d_tick = bars_data[bar_t][4] / bars_data[bar_t][2]
delta = t_now_d - bars_data[bar_t][1]
ticks = int(delta // d_tick) + 1
d_ = int(delta - ((ticks - 1) * d_tick))
b_0 = bars_data[bar_t][0]
b_1 = ticks
bars_tempo_data.append([f"{b_0}.{b_1}.{d_}", t_now_d, *bars_data[bar_t][2:], tempo, chou, major])
for bar_data in bars_data:
if t_now_d <= bar_data[1] < next_change:
if len(df[(df[1] == int(bar_data[1])) & (df[2] == "Key_signature")]) == 1:
chou = int(df[(df[1] == int(bar_data[1])) & (df[2] == "Key_signature")][3].values[0])
major = df[(df[1] == int(bar_data[1])) & (df[2] == "Key_signature")][4].values[0]
bars_tempo_data.append([f"{bar_data[0]}.1.0", *bar_data[1:], tempo, chou, major])
bars_tempo_time_data = []
t_now_s = 0.0
for i in range(len(bars_tempo_data)):
if i == 0:
bars_tempo_time_data.append([*bars_tempo_data[i], 0.0])
continue
d_delta = bars_tempo_data[i][1] - bars_tempo_data[i - 1][1]
tempo = bars_tempo_data[i - 1][5]
haku = d_delta / d
t_s_per_4 = 60.0 / tempo
time = t_s_per_4 * haku
t_now_s += time
bars_tempo_time_data.append([*bars_tempo_data[i], t_now_s])
df_hyousi_and_tempo = pd.DataFrame(bars_tempo_time_data, columns=["拍子位置", "時間", "拍子分子", "拍子分母", "1小節長さ", "テンポ", "調", "長調/短調", "絶対時間"])
df_hyousi_and_tempo.to_csv(os.path.join(args.output_dir, "beats_and_tempo.csv"))
# Calc time
def get_times_t_d(t_d):
j_bar = 0
j = 0
for j in range(len(bars_tempo_time_data)):
if bars_tempo_time_data[j][1] > t_d:
j -= 1
j_bar = j
while not ".1.0" in bars_tempo_time_data[j_bar][0]:
j_bar -= 1
break
d_delta = t_d - bars_tempo_time_data[j][1]
tempo = bars_tempo_time_data[j][5]
haku = d_delta / d
t_s_per_4 = 60.0 / tempo
time_s = t_s_per_4 * haku
t_zettai = bars_tempo_time_data[j][8] + time_s
bar = bars_tempo_time_data[j_bar][0].replace(".1.0", "")
d_delta = t_d - bars_tempo_time_data[j_bar][1]
d_haku = bars_tempo_time_data[j_bar][4] / bars_tempo_time_data[j_bar][2]
haku = int(d_delta // d_haku)
remain = int(d_delta - (d_haku * haku))
t_hyousi = f"{bar}.{haku+1}.{remain}"
return t_zettai, t_hyousi
# Extract notes data
notes_data = []
for _, row in df[df[2] == "Note_on_c"].iterrows():
t_note_on = int(row[1])
channel = int(row[3])
pitch = int(row[4])
for _, row2 in df[df[1] > t_note_on].iterrows():
if int(row2[1]) <= t_note_on:
continue
elif row2[2] == "Note_off_c" and int(row2[3]) == channel and int(row2[4]) == pitch:
t_note_off = int(row2[1])
break
t_zettai_note_on, hyousi_note_on = get_times_t_d(t_note_on)
t_zettai_note_off, hyousi_note_off = get_times_t_d(t_note_off)
# Define pitch display text
pi_dict_normal = {0: "C", 1: "C#", 2: "D", 3: "Eb", 4: "E", 5: "F", 6: "F#", 7: "G", 8: "G#", 9: "A", 10: "Bb", 11: "B"}
pi_dict_sharp = {0: "C", 1: "C#", 2: "D", 3: "D#", 4: "E", 5: "F", 6: "F#", 7: "G", 8: "G#", 9: "A", 10: "A#", 11: "B"}
pi_dict_flat = {0: "C", 1: "Db", 2: "D", 3: "Eb", 4: "E", 5: "F", 6: "Gb", 7: "G", 8: "Ab", 9: "A", 10: "Bb", 11: "B"}
oc = pitch // 12 - 1
_chou = df_hyousi_and_tempo[df_hyousi_and_tempo["時間"] < t_note_on].iloc[-1]["調"]
if _chou > 0:
pi_dict = pi_dict_sharp
elif _chou < 0:
pi_dict = pi_dict_flat
else:
pi_dict = pi_dict_normal
pi = pi_dict[pitch % 12]
notes_data.append([t_note_on, t_note_off, hyousi_note_on, hyousi_note_off, t_zettai_note_on, t_zettai_note_off, oc, pi, pitch, channel])
df_notes = pd.DataFrame(notes_data)
channels = sorted(df_notes[9].unique()[:4])
df_notes[9] = df_notes[9].apply(lambda c: channels.index(c))
df_notes.to_csv(os.path.join(args.output_dir, "notes.csv"))
# Extract text data
text_data = []
for _, row in df[df[2] == "Text_t"].iterrows():
t = int(row[1])
text = row[3]
t_zettai, hyousi = get_times_t_d(t)
text_data.append([t, t_zettai, get_time(t_zettai), hyousi, text])
pd.DataFrame(text_data).to_csv(os.path.join(args.output_dir, "texts.csv"))
# Extract marker data
text_data = []
for _, row in df[df[2] == "Marker_t"].iterrows():
t = int(row[1])
text = row[3]
t_zettai, hyousi = get_times_t_d(t)
text_data.append([t, t_zettai, get_time(t_zettai), hyousi, text])
pd.DataFrame(text_data).to_csv(os.path.join(args.output_dir, "markers.csv"))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='MIDI to CSV utility based on py_midicsv library')
parser.add_argument('-i', '--input', required=True, help="Input path")
parser.add_argument('-o', '--output_dir', default="output", help="Output directory")
args = parser.parse_args()
try:
main(args)
print("Converted.")
except Exception as e:
print(f"Error has occered: {e}")
実行結果のサンプル
以下のようなCSVデータが得られます。
- beats_and_tempo.csv:各小節の始めの位置、変更したテンポの位置などが格納されます。
,拍子位置,時間(秒),拍子分子,拍子分母,1小節長さ,テンポ,調,長調/短調,絶対時間(秒)
0,1.1.0,0,4,4,1920,175.0,-6,major,0.0
1,2.1.0,1920,4,4,1920,175.0,-6,major,1.3714285714285714
2,3.1.0,3840,4,4,1920,175.0,-6,major,2.742857142857143
3,4.1.0,5760,4,4,1920,175.0,-6,major,4.114285714285714
4,5.1.0,7680,4,4,1920,175.0,-6,major,5.485714285714286
5,6.1.0,9600,4,4,1920,175.0,-6,major,6.857142857142858
6,7.1.0,11520,4,4,1920,175.0,-6,major,8.22857142857143
7,8.1.0,13440,4,4,1920,175.0,-6,major,9.600000000000001
8,9.1.0,15360,4,4,1920,175.0,-6,major,10.971428571428573
- markers.csv:小節単位のマーカー情報が格納されます。
,拍子位置,時間(秒),時間,拍子位置,マーカー
0,5760,4.114285714285714,0:04:11,4.1.0,I
1,13440,9.600000000000001,0:09:60,8.1.0,I-A1
2,28800,20.57142857142857,0:20:57,16.1.0,A1
3,59520,42.51428571428572,0:42:51,32.1.0,B1
4,74880,53.48571428571431,0:53:48,40.1.0,S1
5,117120,83.65714285714283,1:23:65,62.1.0,S1-A2
6,128640,91.88571428571423,1:31:88,68.1.0,A2
7,144000,102.85714285714276,1:42:85,76.1.0,B2
- notes.csv:ノート情報が格納されます。
,Note ON拍子位置,Note OFF拍子位置,Note ON時間(秒),Note OFF時間(秒),オクターブ,音階,音高,チャンネル
0,5760,6235,4.1.0,4.1.475,4.114285714285714,4.453571428571428,5,Bb,82,0
1,6240,6478,4.2.0,4.2.238,4.457142857142856,4.627142857142857,5,Eb,75,0
2,6720,7195,4.3.0,4.3.475,4.8,5.139285714285714,5,Ab,80,0
3,7200,7438,4.4.0,4.4.238,5.142857142857142,5.312857142857142,5,Db,73,0
4,7440,7915,4.4.240,5.1.235,5.314285714285714,5.6535714285714285,5,Ab,80,0
5,7920,8158,5.1.240,5.1.478,5.6571428571428575,5.827142857142857,5,Db,73,0
- texts.csv:コード等任意の位置に設定したテキスト情報が格納されます。
,拍子位置,時間(秒),時間,拍子位置,テキスト
0,5760,4.114285714285714,0:04:11,4.1.0,Cdim
1,6480,4.628571428571428,0:04:62,4.2.240,B
2,7440,5.314285714285714,0:05:31,4.4.240,Bbm
3,8400,6.0,0:06:00,5.2.240,Gbm/A
4,9600,6.857142857142858,0:06:85,6.1.0,Abm7