LoginSignup
22
19

【Python】MIDIデータをイベント種類別にCSVに変換するコマンドラインアプリ

Last updated at Posted at 2023-12-09

はじめに

この記事は、MIDIデータをCSVに書き出すために作ったプログラムです。
単に全データを書き出すだけではなく、

  • 拍子・テンポ情報
  • ノート情報
  • マーカー情報
  • テキスト情報

などを分け、わかりやすく保存することを目的としています。MIDIデータを扱うアプリケーションを開発するときの仲介としての位置づけです。

特に、MIDIデルタ時間(プログラム内では「拍子時間」と呼んでいます)から絶対時間(~~秒)の計算をあらかじめ行うことで、アプリケーション側でその計算を行わなくてよいことが利点です。

実装

以下にプログラムを公開しました。

長いですが、メインの処理は以下のような実装になっています。

import py_midicsv as pm
import argparse
import pandas as pd
import os


def midi_to_csv(input_path):
    csv_string = pm.midi_to_csv(input_path)
    df = pd.DataFrame([data.strip().split(", ") for data in csv_string]).map(lambda x: x.replace('"', '') if type(x) == str else x)
    return df


def get_time(x):
    m = int(x // 60)
    s = x - (m * 60)
    s_f = int(s // 1)
    ss = int((s - s_f) * 100)
    return f"{m:0d}:{s_f:02d}:{ss:02d}"


def main(args):
    df = midi_to_csv(args.input)
    os.makedirs(args.output_dir, exist_ok=True)
    
    # TEMP
    df.to_csv(os.path.join(args.output_dir, "all.csv"))
    df[1] = df[1].astype(int)

    d = 0
    beat_n, beat_b = 0, 0
    tempo = 0.0

    t_now_d = 0 
    t_now_s = 0.0
    t_d_1bar = 0
    bar = 0

    bars_data = []

    # Extract data except notes
    for idx, row in df.iterrows():
        # ===== Header =====
        if row[2] == "Header":
            d = int(row[5])
            continue
        
        elif row[2] == "Title_t":
            title = str(row[3])
            continue
        
        # ===== Content =====
        if row[2] == "Tempo":
            tempo = float(60 / (int(row[3]) * (10 ** -6)))
        
        elif row[2] == "Time_signature":
            if t_d_1bar != 0:
                t_delta = int(row[1]) - t_now_d
                bars = t_delta // t_d_1bar
                for i in range(bars):
                    bar += 1
                    bars_data.append([bar, t_now_d + i * t_d_1bar, beat_n, beat_b, t_d_1bar])
            
            beat_n = int(row[3])
            beat_b = int(2 ** int(row[4]))
            t_d_1bar = int(d / (beat_b / 4) * beat_n)
            t_now_d = int(row[1])
            
        elif row[2] == "End_track":
            if t_d_1bar != 0:
                t_delta = int(row[1]) - t_now_d
                bars = t_delta // t_d_1bar
                for i in range(bars + 10):
                    bar += 1
                    bars_data.append([bar, t_now_d + i * t_d_1bar, beat_n, beat_b, t_d_1bar])
            
            t_now_d = int(row[1])
            break

    df_tempo = df[df[2] == "Tempo"].copy()
    df_tempo["v"] = df_tempo[3].apply(lambda x: round(60 / (int(x) * (10 ** -6)), 2))
    df_tempo = df_tempo[[1, "v"]].reset_index(drop=True)
    df_tempo["next_change"] = list(df_tempo[1].values[1:]) + [9999999]
    df_tempo["next_change"] = df_tempo["next_change"].astype(int)
    df_tempo[1] = df_tempo[1].astype(int)

    bars_tempo_data = []
    tempo = 0.0
    t_now_d = 0
    chou = 0
    major = 0

    for idx, tempo_data in df_tempo.iterrows():    
        t_now_d = int(tempo_data[1])
        tempo = tempo_data["v"]
        next_change = tempo_data["next_change"]
        
        if not t_now_d in [data[1] for data in bars_data]:
            for i in range(len(bars_data)):
                if bars_data[i][1] > t_now_d:
                    bar_t = i - 1
                    break
            
            d_tick = bars_data[bar_t][4] / bars_data[bar_t][2]
            delta = t_now_d - bars_data[bar_t][1]
            ticks = int(delta // d_tick) + 1
            d_ = int(delta - ((ticks - 1) * d_tick))
            b_0 = bars_data[bar_t][0]
            b_1 = ticks
            bars_tempo_data.append([f"{b_0}.{b_1}.{d_}", t_now_d, *bars_data[bar_t][2:], tempo, chou, major])
        
        for bar_data in bars_data:
            if t_now_d <= bar_data[1] < next_change:
                if len(df[(df[1] == int(bar_data[1])) & (df[2] == "Key_signature")]) == 1:
                    chou = int(df[(df[1] == int(bar_data[1])) & (df[2] == "Key_signature")][3].values[0])
                    major = df[(df[1] == int(bar_data[1])) & (df[2] == "Key_signature")][4].values[0]
                bars_tempo_data.append([f"{bar_data[0]}.1.0", *bar_data[1:], tempo, chou, major])
        
    bars_tempo_time_data = []
    t_now_s = 0.0

    for i in range(len(bars_tempo_data)):
        if i == 0:
            bars_tempo_time_data.append([*bars_tempo_data[i], 0.0])
            continue
        
        d_delta = bars_tempo_data[i][1] - bars_tempo_data[i - 1][1]
        tempo = bars_tempo_data[i - 1][5]
        
        haku = d_delta / d
        t_s_per_4 = 60.0 / tempo
        time = t_s_per_4 * haku
        t_now_s += time
        
        bars_tempo_time_data.append([*bars_tempo_data[i], t_now_s])
            
    df_hyousi_and_tempo = pd.DataFrame(bars_tempo_time_data, columns=["拍子位置", "時間", "拍子分子", "拍子分母", "1小節長さ", "テンポ", "調", "長調/短調", "絶対時間"])
    df_hyousi_and_tempo.to_csv(os.path.join(args.output_dir, "beats_and_tempo.csv"))

    # Calc time
    def get_times_t_d(t_d):
        j_bar = 0
        j = 0
        for j in range(len(bars_tempo_time_data)):
            if bars_tempo_time_data[j][1] > t_d:
                j -= 1
                j_bar = j
                while not ".1.0" in bars_tempo_time_data[j_bar][0]:
                    j_bar -= 1
                break

        d_delta = t_d - bars_tempo_time_data[j][1]
        tempo = bars_tempo_time_data[j][5]
        haku = d_delta / d
        t_s_per_4 = 60.0 / tempo
        time_s = t_s_per_4 * haku

        t_zettai =  bars_tempo_time_data[j][8] + time_s
        
        bar = bars_tempo_time_data[j_bar][0].replace(".1.0", "")
        d_delta = t_d - bars_tempo_time_data[j_bar][1]
        d_haku = bars_tempo_time_data[j_bar][4] / bars_tempo_time_data[j_bar][2]
        haku = int(d_delta // d_haku)
        remain = int(d_delta - (d_haku * haku))
        
        t_hyousi = f"{bar}.{haku+1}.{remain}"
        return t_zettai, t_hyousi


    # Extract notes data
    notes_data = []

    for _, row in df[df[2] == "Note_on_c"].iterrows():
        t_note_on = int(row[1])
        channel = int(row[3])
        pitch = int(row[4])

        for _, row2 in df[df[1] > t_note_on].iterrows():
            if int(row2[1]) <= t_note_on:
                continue
            elif row2[2] == "Note_off_c" and int(row2[3]) == channel and int(row2[4]) == pitch:
                t_note_off = int(row2[1])
                break

        t_zettai_note_on, hyousi_note_on = get_times_t_d(t_note_on)
        t_zettai_note_off, hyousi_note_off = get_times_t_d(t_note_off)

        # Define pitch display text
        pi_dict_normal = {0: "C", 1: "C#", 2: "D", 3: "Eb", 4: "E", 5: "F", 6: "F#", 7: "G", 8: "G#", 9: "A", 10: "Bb", 11: "B"}
        pi_dict_sharp = {0: "C", 1: "C#", 2: "D", 3: "D#", 4: "E", 5: "F", 6: "F#", 7: "G", 8: "G#", 9: "A", 10: "A#", 11: "B"}
        pi_dict_flat = {0: "C", 1: "Db", 2: "D", 3: "Eb", 4: "E", 5: "F", 6: "Gb", 7: "G", 8: "Ab", 9: "A", 10: "Bb", 11: "B"}
        oc = pitch // 12 - 1

        _chou = df_hyousi_and_tempo[df_hyousi_and_tempo["時間"] < t_note_on].iloc[-1]["調"]
        if _chou > 0:
            pi_dict = pi_dict_sharp
        elif _chou < 0:
            pi_dict = pi_dict_flat
        else:
            pi_dict = pi_dict_normal

        pi = pi_dict[pitch % 12]

        notes_data.append([t_note_on, t_note_off, hyousi_note_on, hyousi_note_off, t_zettai_note_on, t_zettai_note_off, oc, pi, pitch, channel])
    
    df_notes = pd.DataFrame(notes_data)
    channels = sorted(df_notes[9].unique()[:4])

    df_notes[9] = df_notes[9].apply(lambda c: channels.index(c))
    df_notes.to_csv(os.path.join(args.output_dir, "notes.csv"))


    # Extract text data
    text_data = []
    for _, row in df[df[2] == "Text_t"].iterrows():
        t = int(row[1])
        text = row[3]
        t_zettai, hyousi = get_times_t_d(t)
        text_data.append([t, t_zettai, get_time(t_zettai), hyousi, text])
    pd.DataFrame(text_data).to_csv(os.path.join(args.output_dir, "texts.csv"))

    # Extract marker data
    text_data = []
    for _, row in df[df[2] == "Marker_t"].iterrows():
        t = int(row[1])
        text = row[3]
        t_zettai, hyousi = get_times_t_d(t)
        text_data.append([t, t_zettai, get_time(t_zettai), hyousi, text])
    pd.DataFrame(text_data).to_csv(os.path.join(args.output_dir, "markers.csv"))


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='MIDI to CSV utility based on py_midicsv library')
    parser.add_argument('-i', '--input', required=True, help="Input path")
    parser.add_argument('-o', '--output_dir', default="output", help="Output directory")
    args = parser.parse_args()
    try:
        main(args)
        print("Converted.")
    except Exception as e:
        print(f"Error has occered: {e}")

実行結果のサンプル

以下のようなCSVデータが得られます。

  • beats_and_tempo.csv:各小節の始めの位置、変更したテンポの位置などが格納されます。
,拍子位置,時間(秒),拍子分子,拍子分母,1小節長さ,テンポ,調,長調/短調,絶対時間(秒)
0,1.1.0,0,4,4,1920,175.0,-6,major,0.0
1,2.1.0,1920,4,4,1920,175.0,-6,major,1.3714285714285714
2,3.1.0,3840,4,4,1920,175.0,-6,major,2.742857142857143
3,4.1.0,5760,4,4,1920,175.0,-6,major,4.114285714285714
4,5.1.0,7680,4,4,1920,175.0,-6,major,5.485714285714286
5,6.1.0,9600,4,4,1920,175.0,-6,major,6.857142857142858
6,7.1.0,11520,4,4,1920,175.0,-6,major,8.22857142857143
7,8.1.0,13440,4,4,1920,175.0,-6,major,9.600000000000001
8,9.1.0,15360,4,4,1920,175.0,-6,major,10.971428571428573
  • markers.csv:小節単位のマーカー情報が格納されます。
,拍子位置,時間(秒),時間,拍子位置,マーカー
0,5760,4.114285714285714,0:04:11,4.1.0,I
1,13440,9.600000000000001,0:09:60,8.1.0,I-A1
2,28800,20.57142857142857,0:20:57,16.1.0,A1
3,59520,42.51428571428572,0:42:51,32.1.0,B1
4,74880,53.48571428571431,0:53:48,40.1.0,S1
5,117120,83.65714285714283,1:23:65,62.1.0,S1-A2
6,128640,91.88571428571423,1:31:88,68.1.0,A2
7,144000,102.85714285714276,1:42:85,76.1.0,B2
  • notes.csv:ノート情報が格納されます。
,Note ON拍子位置,Note OFF拍子位置,Note ON時間(秒),Note OFF時間(秒),オクターブ,音階,音高,チャンネル
0,5760,6235,4.1.0,4.1.475,4.114285714285714,4.453571428571428,5,Bb,82,0
1,6240,6478,4.2.0,4.2.238,4.457142857142856,4.627142857142857,5,Eb,75,0
2,6720,7195,4.3.0,4.3.475,4.8,5.139285714285714,5,Ab,80,0
3,7200,7438,4.4.0,4.4.238,5.142857142857142,5.312857142857142,5,Db,73,0
4,7440,7915,4.4.240,5.1.235,5.314285714285714,5.6535714285714285,5,Ab,80,0
5,7920,8158,5.1.240,5.1.478,5.6571428571428575,5.827142857142857,5,Db,73,0
  • texts.csv:コード等任意の位置に設定したテキスト情報が格納されます。
,拍子位置,時間(秒),時間,拍子位置,テキスト
0,5760,4.114285714285714,0:04:11,4.1.0,Cdim
1,6480,4.628571428571428,0:04:62,4.2.240,B
2,7440,5.314285714285714,0:05:31,4.4.240,Bbm
3,8400,6.0,0:06:00,5.2.240,Gbm/A
4,9600,6.857142857142858,0:06:85,6.1.0,Abm7
22
19
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
22
19