More than 5 years have passed since last update.

DSP.jlでspectrogramを計算する

Last updated at 2019-07-02Posted at 2019-07-02

メモ書き。
基本的にはリファレンスを見たほうが良い。
https://juliadsp.github.io/DSP.jl/stable/periodograms/

音の作成

CureMIDI.jlとMIDI.jlを使います。

以下のgenerateを使ってTrainingDataを生成します。
このTrainingData.soundに対してスペクトログラムを計算します。

struct TrainingData
    track::MIDI.MIDITrack
    sound::SampledSignals.SampleBuf
end


function generate(sf2_paths::Vector{String}, sample_num_per_sf2::Int)
    training_dataset = Vector{TrainingData}()
    Random.seed!(0)
    for sf2_path in sf2_paths
        for i in 1:sample_num_per_sf2
            track = create_rand_midi_track()
            sampled_buf = synth(track, TPQ, BPM, SAMPLE_RATE, sf2_path)
            training_data = TrainingData(track, sampled_buf)
            push!(training_dataset, training_data)
        end
    end
    return training_dataset
end

function create_rand_midi_track()
    track = MIDI.MIDITrack()
    notes = MIDI.Notes()
    start_pitch = 69

    max_tick = 0
    for i in 1:10
        # pitch, velocity, position, duration
        pitch = start_pitch  + 2 * i
        velocity = 70
        start_ms = 1000 * (i - 1)
        start_tick = UInt(ceil(CureMIDI.ms_to_tick(start_ms, TPQ, BPM)))
        duration_tick = UInt(ceil(CureMIDI.ms_to_tick(1000, TPQ, BPM)))

        max_tick = max(max_tick, start_tick + duration_tick)
        note = MIDI.Note(pitch, velocity, start_tick, duration_tick)
        push!(notes, note)
    end

    addnotes!(track, notes)
    return track
end

spectrogramの計算

ステレオで音を生成するのでsoundにはleft、rightのchannel(2)xframe数の二次元配列で格納されている。
left


function spectrogram(sample_buf::SampledSignals.SampleBuf)
    samplerate = SampledSignals.samplerate(sample_buf)

    spectrograms = Vector()
    for ch in 1:nchannels(sample_buf)
        input = sample_buf.data[:, ch]

        # n 8部音符ごと
        tick_per_eighteen_note::UInt = ceil(TPQ / 2)
        frame_per_eighteen_node = CureMIDI.tick_to_frame(tick_per_eighteen_note, TPQ, BPM, SAMPLE_RATE)
        n::Int64 = ceil(length(input) / frame_per_eighteen_node)
        spectrogram = DSP.spectrogram(input, n, nfft=n*2, fs=samplerate)

        push!(spectrograms, spectrogram)
    end
    return spectrograms
end

spectrograms = spectrogram(training_dataset[1].sound)

p = power(spectrograms[1])
fs = freq(spectrograms[1])
ts = time(spectrograms[1])

計算結果

power

周波数の強さ。
STFTの結果の二乗の値が入る。

freq

周波数は量子化されている。
量子化された周波数の真ん中の値
周波数の上限はナイキスト周波数を考慮し、入力の1/2。

time

時間も量子化されている。
量子化された時間の真ん中の値。
上の例だと単位は秒。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up