@t-yano-tryhard (矢野孝幸)posted at 2024-01-24

音声ファイルの周波数分解

Q&A

Closed

FFT

解決したいこと

周波数帯ごとの出力表示を正しくしたい

発生している問題・エラー

図.1

該当するソースコード

main.c

#include <stdio.h>
#include <stdint.h>
#include <sys/stat.h>
#include <stdbool.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>

#define SAMPLE 1024

#define __FILENAME__ (__builtin_strrchr(__FILE__, '/') ? __builtin_strrchr(__FILE__, '/') + 1 : __FILE__)

#define d(s, ...)                                                                     \
    do                                                                                \
    {                                                                                 \
        printf("%s(%d) %s " s "\n", __FILENAME__, __LINE__, __func__, ##__VA_ARGS__); \
    } while (0)

#define ERRRET(c, s, ...)                                                                 \
    do                                                                                    \
    {                                                                                     \
        if (c)                                                                            \
        {                                                                                 \
            printf("%s(%d) %s " s "\n", __FILENAME__, __LINE__, __func__, ##__VA_ARGS__); \
            goto error_return;                                                            \
        }                                                                                 \
    } while (0)

typedef struct str_music
{
    uint32_t ttl_num;
    uint32_t sample_num;
    uint16_t data[0][SAMPLE];
} music_t;

typedef struct str_wav_data
{
    uint32_t id;
    uint32_t size;
    uint16_t data[];
} wav_data_t;

typedef struct str_fmt_chank
{
    uint32_t id;
    uint32_t size;
    uint16_t tag;
    uint16_t channel;
    uint32_t sample_rate;
    uint32_t byte_rate;
    uint16_t block_byte;
    uint16_t bit_per_sample;
    wav_data_t wav_data;
} fmt_chank_t;

typedef struct str_wav
{
    uint32_t id;
    uint32_t size;
    uint32_t format;
    fmt_chank_t chank;
} wav_t;

music_t *get_music_from_arg(int argc, char *argv[]);
bool is_monaural_16bit(wav_t *wav);
void free_music(music_t *music);
void dft(uint16_t *out_data, uint16_t *data, uint32_t num);

int main(int argc, char *argv[])
{
    music_t *music = NULL;
    uint16_t *output = NULL;
    char line[SAMPLE * 8];
    char *p;
    int i, j;

    music = get_music_from_arg(argc, argv);
    if (!music)
        goto error_return;

    // d("ttl:%x sample:%x", music->ttl_num, music->sample_num);
    output = malloc(sizeof(music->data[0]));

    for (i = 0; i < music->sample_num; i++)
    {
        dft(output, music->data[i], SAMPLE);
        p = line;
        for (j = 0; j < SAMPLE; j++)
        {
            p += sprintf(p, "%u,", output[j]);
        }
        *(p - 1) = '\n';
        printf("%s", line);
    }

error_return:
    free_music(music);
    if (output)
        free(output);
    return 0;
}

bool file_exists(char *path)
{
    struct stat st;
    int ret;

    ret = stat(path, &st);
    if (ret)
        return false;

    return S_ISREG(st.st_mode);
}

bool is_monaural_16bit(wav_t *wav)
{
#define PCM_TAG 1
#define BPS_16BIT 0x10
#define MONAURAL 1

    if (!wav)
        return false;

    if (memcmp((char *)&wav->format, "WAVE", 4))
        return false;
    if (memcmp((char *)&wav->chank.id, "fmt ", 4))
        return false;
    if (wav->chank.tag != PCM_TAG)
        return false;
    if (wav->chank.channel != MONAURAL)
        return false;
    if (wav->chank.bit_per_sample != BPS_16BIT)
        return false;

    return memcmp(&wav->chank.wav_data.id, "data", 4) == 0;

#undef MONAURAL
#undef PCM_TAG
#undef BPS_16BIT
}

music_t *get_music_from_arg(int argc, char *argv[])
{
    music_t *music = NULL;
    FILE *fp = NULL;
    uint8_t buf[0x40];
    int ret;
    wav_t *wav;
    uint32_t size;
    uint32_t raw_size;
    uint32_t sample_num;

    ERRRET(argc < 2, "missing arguments");
    ERRRET(!file_exists(argv[1]), "file(%s) not exists", argv[1]);

    fp = fopen(argv[1], "rb");
    ERRRET(!fp, "file open error.");

    ret = fread(buf, sizeof(buf), 1, fp);
    ERRRET(ret < 1, "file read error");

    wav = (wav_t *)buf;
    ERRRET(!is_monaural_16bit(wav), "not expected wav file.");

    size = wav->chank.wav_data.size;
    sample_num = size / (SAMPLE * 2) + ((size % (SAMPLE * 2)) ? 1 : 0);
    raw_size = size;
    size = sample_num * SAMPLE * 2;

    music = calloc(sizeof(*music) + size, 1);
    ERRRET(!music, "malloc failed.");

    fseek(fp, (uint64_t)wav->chank.wav_data.data - ((uint64_t)wav), SEEK_SET);

    ret = fread(music->data, raw_size, 1, fp);
    ERRRET(ret < 1 && !(music = NULL), "wav data read failed.");

    music->sample_num = sample_num;
    music->ttl_num = size;

error_return:
    if (fp)
        fclose(fp);
    return music;
}

void free_music(music_t *music)
{
    if (music)
    {
        free(music);
    }
}

void dft(uint16_t *out_data, uint16_t *data, uint32_t num)
{
    int i, j;
    double real;
    double theta;
    double ddata;

    for (i = 0; i < num; i++)
    {
        real = 0;
        for (j = 0; j < num; j++)
        {
            theta = 2.0 * M_PI * i * j / num;
            ddata = (double)data[j];

            // cancel offset
            ddata -= (65536 / 2);

            real += ddata * cos(theta);
        }

        // shrink range to uint16_t
        real /= SAMPLE;
        real += 65536;
        real /= 2;

        out_data[i] = (uint16_t)round(real);
    }
}

自分で試したこと

FFTの勉強のために音声ファイルの周波数分解を行おうと思いました。
上記main.cを以下のコマンドでコンパイルしました。

gcc -g -O0 -Wall -lm main.c -o sound_check

サンプルファイルを利用してCSVを出力しました。
https://www2.cs.uic.edu/~i101/SoundFiles/PinkPanther30.wav

./sound_check ~/Downloads/PinkPanther30.wav > freq.csv

Rを使用してグラフに出力しました。

Rscript freq.r freq.csv

freq.r

#library("animation")

argv <- commandArgs(trailingOnly = T)
argc <- length(argv)

if (argc < 1) {
    quit()
}

csv_file_path <- argv[1]

data <- read.csv(csv_file_path, header = FALSE)
row_num <- nrow(data)

X11()
replot <- function() {for(i in 1:row_num) {
(plot(1:1024, data[i,], type = "l", ylim = c(0, 65535),
     xlab="", ylab="", main = paste(i, "/", row_num)))

}}

replot()
#saveGIF(replot(), movie.name="freq.gif", interval=0.04)
graphics.off()

すると、図.1のような表示になってしまいました。

検証のためにpythonでもやってみたのですが
同様の表示となってしまうため、計算自体が間違っているわけではなさそうです。

freq.py ~/Downloads/PinkPanther30.wav > freq2.csv
Rscript freq.r freq2.csv

freq.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
import sys
import struct
import numpy as np

argv = sys.argv


def main():
    if not validarg(argv):
        usage()
        sys.exit()

    f = open(argv[1], "rb")
    header = f.read(0x28)

    list = []
    while True:
        data = f.read(2)
        if data:
            d = struct.unpack('<H', data)[0]
            list.append(d)
        else:
            break
    f.close()

    for i, val in enumerate(list):
        list[i] = list[i] - (65536 / 2)
        
    num = len(list)
    
    sample_num = int(num / 1024) + (1 if (num % 1024) > 0 else 0)
    
    for i in range(sample_num):
        orig = []
        if i == (sample_num - 1) :
            orig = list[(i * 1024):]
        else:
            orig = list[(i * 1024):((i + 1) * 1024)]
        orig.extend([0] * (1024 - len(orig)))
        freq = np.fft.fft(orig)
        freq2 = []
        for v in freq:
            f = v.real
            f = f / 1024
            f = f + 65536
            f = f / 2
            freq2.append(f)
            
        print(*freq2, sep=',')


def usage():
    s = f'''
usage:
    {args[0]} [wav file name]
'''[1:-1]
    print(s)

def validarg(args):
    if len(args) < 2:
        return False
    return os.path.isfile(args[1])


if __name__ == '__main__':
    main()

下向きにでているのは虚数部を計算して絶対値をとれば良いと思うのですが
おかしな表示(異常な高周波?など)の原因を教えていただけませんでしょうか？

0 likes

Are you sure you want to delete the question?

音声ファイルの周波数分解

解決したいこと

発生している問題・エラー

該当するソースコード

自分で試したこと

1Answer

Comments

Your answer might help someone💌