LoginSignup
4
2

More than 3 years have passed since last update.

Waveform Audio File Format(WAV)を理解する

Last updated at Posted at 2019-05-27

はじめに

Waveform Audio File Format(WAV)について説明します。
Pulse-code modulation(PCM) を保存できます。次のDocumentに定義されています。

Multimedia Programming Interface and Data Specifications [1]

Format

Header

header を C の struct で表記します。
FormType = "WAVE" の RIFFです。ID = "fmt " と "data" の chunkを持ちます。

struct wav_header {
  uint8_t ChunkID[4];        // "RIFF"
  uint32_t ChunkSize;        // 4 + (8 + SubChunkFmtSize(16)) + (8 + SubChunkDataSize)
  uint8_t FormType[4];       // "WAVE"
  uint8_t SubChunkFmtID[4];  // "fmt "
  uint32_t SubChunkFmtSize;  // 16
#define LPCM (1)
  uint16_t FormatTag;        // 1:LPCM
#define MONO   (1)
#define STEREO (2)
  uint16_t Channels;
#define RATE48KHZ (48000)
  uint32_t SamplesPerSec;
  uint32_t AvgBytesPerSec;   // SamplesPerSec * Channels * BitsPerSample / 8
  uint16_t BlockAlign;       // Channels * BitsPerSample / 8
#define BPS16BIT (16)
  uint16_t BitsPerSample;    // 16
  uint8_t SubChunkDataID[4]; // "data "
  uint32_t SubChunkDataSize; // NumSamples * Channels * BitsPerSample / 8
} __attribute__((packed));

Data

headerの後ろにPCMデータが続きます。Channel毎にSample数分 連続して続きます。
[1]から図を抜粋します。

1.JPG

Code example

sine の wav を生成する例を示します。mono / 48kHz です。

makefile
CFLAGS=-I. -Wall -Werror -O2 -march=native
INCS=
OBJS=test.c
LIBS=
TARGET=test

all: $(TARGET)

%.o: %.c $(INCS)
    $(CC) $(CFLAGS) -c -o $@ $<

$(TARGET): $(OBJS)
    $(CC) $(CFLAGS) -o $@ $^ $(LIBS)

clean:
    rm -rf $(TARGET) *.o
test.c
#include <inttypes.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>

struct wav_header {
  uint8_t ChunkID[4];        // "RIFF"
  uint32_t ChunkSize;        // 4 + (8 + SubChunkFmtSize(16)) + (8 + SubChunkDataSize)
  uint8_t FormType[4];       // "WAVE"
  uint8_t SubChunkFmtID[4];  // "fmt "
  uint32_t SubChunkFmtSize;  // 16
#define LPCM (1)
  uint16_t FormatTag;        // 1:LPCM
#define MONO   (1)
#define STEREO (2)
  uint16_t Channels;
#define RATE48KHZ (48000)
  uint32_t SamplesPerSec;
  uint32_t AvgBytesPerSec;   // SamplesPerSec * Channels * BitsPerSample / 8
  uint16_t BlockAlign;       // Channels * BitsPerSample / 8
#define BPS16BIT (16)
  uint16_t BitsPerSample;    // 16
  uint8_t SubChunkDataID[4]; // "data "
  uint32_t SubChunkDataSize; // NumSamples * Channels * BitsPerSample / 8
} __attribute__((packed));

struct wav_header wav_template = {
  "RIFF",                    // uint8_t ChunkID[4];
  0,                         // uint32_t ChunkSize;
  "WAVE",                    // uint8_t FormType[4];
  "fmt ",                    // uint8_t SubChunkFmtID[4];
  16,                        // uint32_t SubChunkFmtSize;
  LPCM,                      // uint16_t FormatTag;
  MONO,                      // uint16_t Channels;
  RATE48KHZ,                 // uint32_t SamplesPerSec;
  RATE48KHZ*MONO*BPS16BIT/8, // uint32_t AvgBytesPerSec;
  MONO*BPS16BIT/8,           // uint16_t BlockAlign;
  BPS16BIT,                  // uint16_t BitsPerSample;
  "data",                    // uint8_t SubChunkDataID[4];
  0,                         // uint32_t SubChunkDataSize;
};

double chirp(int x, int N, double sf /*sampling_frequency*/, double fs, double fe, double A /*amplitude*/)
{
  double k = (fe - fs) / (double)N;
  return A * sin(2.0 * M_PI / sf * (fs*(double)x + k/2.0 * (double)x * (double)x));
}

/* (write size) = N * sizeof(int16_t) */
void create_and_write_data(FILE* fp, int N, double fs /* start frequency*/, double fe /* end frequency*/, double A /*amplitude*/)
{
  int i;
  size_t ret;

  for (i=0; i<N; i++) {
    int16_t s = (int16_t)round(chirp(i, N, (double)RATE48KHZ, fs, fe, A));
    ret = fwrite(&s, 1, sizeof(s), fp);
    if (ret != sizeof(s)) {
      printf("error fwrite %d\n", __LINE__);
      return;
    }
  }
}

void usage()
{
  printf("usage:\n");
  printf(" test [fs] [fe] [A] [N]\n");
}

int main(int argc, char* argv[])
{
  int fs, fe, A, N;
  size_t ret;
  FILE* fp;

  if (argc != 5) {
    usage();
    return -1;
  }

  fs = atoi(argv[1]);
  fe = atoi(argv[2]);
  A = atoi(argv[3]);
  N = atoi(argv[4]);

  printf("fs=%d fe=%d A=%d N=%d\n", fs, fe, A, N);

  fp = fopen("test.wav", "wb");
  if (fp == NULL) {
    printf("error fopen\n");
    return -1;
  }

  wav_template.SubChunkDataSize = N * sizeof(int16_t);
  wav_template.ChunkSize = 36 + wav_template.SubChunkDataSize;

  // header
  ret = fwrite(&wav_template, 1, sizeof(wav_template), fp);
  if (ret != sizeof(wav_template)) {
    printf("error fwrite %d\n", __LINE__);
    return -1;
  }

  // data
  create_and_write_data(fp, N, (double)fs, (double)fe, (double)A);

  fclose(fp);
  return 0;
}
console
$ gcc --version
gcc.exe (Rev2, Built by MSYS2 project) 6.2.0
$ make clean && make
rm -rf test *.o
cc -I. -Wall -Werror -O2 -march=native -o test test.c
$ ./test.exe
usage:
 test [fs] [fe] [A] [N]
$ ./test.exe 20 20000 30000 480000

fsは始めの周波数です。feは終わりの周波数です。Aは振幅です。Nはデータ数です。
fs = feであれば単一のsineを生成します。 fs < feであれば周波数を変化させます。

WAV Fileを出力する

WAV Fileを出力する例を示します。

test.c
#include <inttypes.h>
#include <stdio.h>

struct wav_header {
  uint8_t ChunkID[4];        // "RIFF"
  uint32_t ChunkSize;        // 4 + (8 + SubChunkFmtSize(16)) + (8 + SubChunkDataSize)
  uint8_t FormType[4];       // "WAVE"
  uint8_t SubChunkFmtID[4];  // "fmt "
  uint32_t SubChunkFmtSize;  // 16
#define LPCM (1)
  uint16_t FormatTag;        // 1:LPCM
#define MONO   (1)
#define STEREO (2)
  uint16_t Channels;
#define RATE48KHZ (48000)
  uint32_t SamplesPerSec;
  uint32_t AvgBytesPerSec;   // SamplesPerSec * Channels * BitsPerSample / 8
  uint16_t BlockAlign;       // Channels * BitsPerSample / 8
#define BPS16BIT (16)
  uint16_t BitsPerSample;    // 16
  uint8_t SubChunkDataID[4]; // "data "
  uint32_t SubChunkDataSize; // NumSamples * Channels * BitsPerSample / 8
} __attribute__((packed));

void usage()
{
  printf("usage:\n");
  printf(" test [file_name]\n");
}

int main(int argc, char* argv[])
{
  FILE* fp;
  char* file_name;
  struct wav_header hd;

  if (argc != 2) {
    usage();
    return -1;
  }

  file_name = argv[1];

  fp = fopen(file_name, "rb");
  if (fp == NULL) {
    printf("error fopen\n");
    return -1;
  }

  // read header
  if (fread(&hd, 1, sizeof(hd), fp) != sizeof(hd)) {
    printf("error fread header\n");
    return -1;
  }

  printf("FormatTag     %d\n", hd.FormatTag);
  printf("Channels      %d\n", hd.Channels);
  printf("SamplesPerSec %d\n", hd.SamplesPerSec);
  printf("BitsPerSample %d\n", hd.BitsPerSample);

  while (!feof(fp)) {
    int i;
    for (i=0; i<hd.Channels; i++) {
      int16_t sample = 0;
      fread(&sample, 1, 2, fp);
      printf("%d %+05d\n", i, sample);
    }
  }

  fclose(fp);
  return 0;
}
console
$ ./test.exe ./test.wav | head
FormatTag     1
Channels      1
SamplesPerSec 48000
BitsPerSample 16
0 +0000
0 +0079
0 +0157
0 +0236
0 +0315
0 +0395
...

References

4
2
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
4
2