DCTのソースコードが存在するがIDCTのソースコードが存在しないのでDCTを解析してIDCTを実装してみた。
DCTからIDCTの導出は、数学の問題のように式変形をする感じでプログラムをしていくとできました。三角関数のテーブルの中身とかは関係なく単純な式の変換だけでした。
すぐ忘れそうな内容なので過程をメモしときます。
coef = 0;
for (i = 0; i < nlnn - 1; ++i) {
loop1 = 1 << (nlnn - 2 - i);
loop2 = 1 << i;
index0 = 0;
index1 = 1 << (i + 1);
offset = 1 << (i + 2);
for (k = 0; k < loop2; ++k) {
cc = p_c[coef];
cs = p_s[coef++];
for (j = 0; j < loop1; ++j) {
a = p_work[index0 + 0];
b = p_work[index0 + 1];
c = p_work[index1 + 0] * cc + p_work[index1 + 1] * cs;
d = p_work[index1 + 0] * cs - p_work[index1 + 1] * cc;
p_work[index0 + 0] = a + c;
p_work[index0 + 1] = b + d;
p_work[index1 + 0] = a - c;
p_work[index1 + 1] = b - d;
index0 += offset;
index1 += offset;
}
index0 += 2 - nsmpl;
index1 += 2 - nsmpl;
}
}
というDCT処理があるのですが、逆変換のコードが無いので自力で解析して実装したかった。
nlnn : (1 << nlnn) が nsmpl
nsmpl : サンプル数
p_work : データ 入力と出力を兼用
p_c : cosテーブル
p_s : sinテーブル
サンプル数が4の場合を解析
サンプル数を小さくしDCT処理の動作を解析してみます。サンプル数 nsmpl=4にしてループを展開してみます。
{
cc = p_c[0];
cs = p_s[0];
a = p_work[0];
b = p_work[1];
c = p_work[2] * cc + p_work[3] * cs;
d = p_work[2] * cs - p_work[3] * cc;
p_work[0] = a + c;
p_work[1] = b + d;
p_work[2] = a - c;
p_work[3] = b - d;
}
となります。これなら逆変換を導出できそうです。
cc = p_c[0];
cs = p_s[0];
a = (p_work[0] + p_work[2]) / 2;
b = (p_work[1] + p_work[3]) / 2;
c = (p_work[0] - p_work[2]) / 2;
d = (p_work[1] - p_work[3]) / 2;
p_work[0] = a;
p_work[1] = b;
p_work[2] = (c * cc + d * cs) / (cc * cc + cs * cs);
p_work[3] = (c * cs - d * cc) / (cc * cc + cs * cs);
とすると元の値を取り出せました。
サンプル数が8の場合を解析
サンプル数を8にして再度ループを展開してみます。
cc = p_c[0];
cs = p_s[0];
a = p_work[0];
b = p_work[1];
c = p_work[2] * cc + p_work[3] * cs;
d = p_work[2] * cs - p_work[3] * cc;
p_work[0] = a + c;
p_work[1] = b + d;
p_work[2] = a - c;
p_work[3] = b - d;
a = p_work[4];
b = p_work[5];
c = p_work[6] * cc + p_work[7] * cs;
d = p_work[6] * cs - p_work[7] * cc;
p_work[4] = a + c;
p_work[5] = b + d;
p_work[6] = a - c;
p_work[7] = b - d;
cc = p_c[1];
cs = p_s[1];
a = p_work[0];
b = p_work[1];
c = p_work[4] * cc + p_work[5] * cs;
d = p_work[4] * cs - p_work[5] * cc;
p_work[0] = a + c;
p_work[1] = b + d;
p_work[4] = a - c;
p_work[5] = b - d;
cc = p_c[2];
cs = p_s[2];
a = p_work[2];
b = p_work[3];
c = p_work[6] * cc + p_work[7] * cs;
d = p_work[6] * cs - p_work[7] * cc;
p_work[2] = a + c;
p_work[3] = b + d;
p_work[6] = a - c;
p_work[7] = b - d;
となりました。変換が4回行われています。1回の変換はサンプル数が4の時の変換と同じなので、同様に逆変換を導出できます。
cc = p_c[2];
cs = p_s[2];
a = (p_work[2] + p_work[6]) / 2;
b = (p_work[3] + p_work[7]) / 2;
c = (p_work[2] - p_work[6]) / 2;
d = (p_work[3] - p_work[7]) / 2;
p_work[2] = a;
p_work[3] = b;
p_work[6] = (c * cc + d * cs) / (cc * cc + cs * cs);
p_work[7] = (c * cs - d * cc) / (cc * cc + cs * cs);
cc = p_c[1];
cs = p_s[1];
a = (p_work[0] + p_work[4]) / 2;
b = (p_work[1] + p_work[5]) / 2;
c = (p_work[0] - p_work[4]) / 2;
d = (p_work[1] - p_work[5]) / 2;
p_work[0] = a;
p_work[1] = b;
p_work[4] = (c * cc + d * cs) / (cc * cc + cs * cs);
p_work[5] = (c * cs - d * cc) / (cc * cc + cs * cs);
cc = p_c[0];
cs = p_s[0];
a = (p_work[4] + p_work[6]) / 2;
b = (p_work[5] + p_work[7]) / 2;
c = (p_work[4] - p_work[6]) / 2;
d = (p_work[5] - p_work[7]) / 2;
p_work[4] = a;
p_work[5] = b;
p_work[6] = (c * cc + d * cs) / (cc * cc + cs * cs);
p_work[7] = (c * cs - d * cc) / (cc * cc + cs * cs);
cc = p_c[0];
cs = p_s[0];
a = (p_work[0] + p_work[2]) / 2;
b = (p_work[1] + p_work[3]) / 2;
c = (p_work[0] - p_work[2]) / 2;
d = (p_work[1] - p_work[3]) / 2;
p_work[0] = a;
p_work[1] = b;
p_work[2] = (c * cc + d * cs) / (cc * cc + cs * cs);
p_work[3] = (c * cs - d * cc) / (cc * cc + cs * cs);
という式で元の値が得られました。
IDCTをループ化
DCTと同じくIDCTも三重ループになるので、展開されている逆変換の式をちょっとずつループにしていきます。
サンプル数4の場合の逆変換を配列のインデックスを変数を使うようにします。
coef = 0;
i = 0;
index0 = 0;
index1 = 1 << (i + 1);
{
cc = p_c[coef];
cs = p_s[coef];
a = (p_work[index0 + 0] + p_work[index1 + 0]) / 2;
b = (p_work[index0 + 1] + p_work[index1 + 1]) / 2;
c = (p_work[index0 + 0] - p_work[index1 + 0]) / 2;
d = (p_work[index0 + 1] - p_work[index1 + 1]) / 2;
p_work[index0 + 0] = a;
p_work[index0 + 1] = b;
p_work[index1 + 0] = (c * cc + d * cs) / (cc * cc + cs * cs);
p_work[index1 + 1] = (c * cs - d * cc) / (cc * cc + cs * cs);
}
サンプル数8の場合の逆変換もインデックスを変数にする。
coef = 2;
index0 = 2;
index1 = 6;
{
cc = p_c[coef];
cs = p_s[coef--];
a = (p_work[index0 + 0] + p_work[index1 + 0]) / 2;
b = (p_work[index0 + 1] + p_work[index1 + 1]) / 2;
c = (p_work[index0 + 0] - p_work[index1 + 0]) / 2;
d = (p_work[index0 + 1] - p_work[index1 + 1]) / 2;
p_work[index0 + 0] = a;
p_work[index0 + 1] = b;
p_work[index1 + 0] = (c * cc + d * cs) / (cc * cc + cs * cs);
p_work[index1 + 1] = (c * cs - d * cc) / (cc * cc + cs * cs);
}
index0 = 0;
index1 = 4;
{
cc = p_c[coef];
cs = p_s[coef--];
a = (p_work[index0 + 0] + p_work[index1 + 0]) / 2;
b = (p_work[index0 + 1] + p_work[index1 + 1]) / 2;
c = (p_work[index0 + 0] - p_work[index1 + 0]) / 2;
d = (p_work[index0 + 1] - p_work[index1 + 1]) / 2;
p_work[index0 + 0] = a;
p_work[index0 + 1] = b;
p_work[index1 + 0] = (c * cc + d * cs) / (cc * cc + cs * cs);
p_work[index1 + 1] = (c * cs - d * cc) / (cc * cc + cs * cs);
}
cc = p_c[coef];
cs = p_s[coef--];
index0 = 4;
index1 = 6;
{
a = (p_work[index0 + 0] + p_work[index1 + 0]) / 2;
b = (p_work[index0 + 1] + p_work[index1 + 1]) / 2;
c = (p_work[index0 + 0] - p_work[index1 + 0]) / 2;
d = (p_work[index0 + 1] - p_work[index1 + 1]) / 2;
p_work[index0 + 0] = a;
p_work[index0 + 1] = b;
p_work[index1 + 0] = (c * cc + d * cs) / (cc * cc + cs * cs);
p_work[index1 + 1] = (c * cs - d * cc) / (cc * cc + cs * cs);
}
index0 = 0;
index1 = 2;
{
a = (p_work[index0 + 0] + p_work[index1 + 0]) / 2;
b = (p_work[index0 + 1] + p_work[index1 + 1]) / 2;
c = (p_work[index0 + 0] - p_work[index1 + 0]) / 2;
d = (p_work[index0 + 1] - p_work[index1 + 1]) / 2;
p_work[index0 + 0] = a;
p_work[index0 + 1] = b;
p_work[index1 + 0] = (c * cc + d * cs) / (cc * cc + cs * cs);
p_work[index1 + 1] = (c * cs - d * cc) / (cc * cc + cs * cs);
}
3番目と4番目の逆変換をループに置き換える。
coef = 2;
index0 = 2;
index1 = 6;
{
cc = p_c[coef];
cs = p_s[coef--];
a = (p_work[index0 + 0] + p_work[index1 + 0]) / 2;
b = (p_work[index0 + 1] + p_work[index1 + 1]) / 2;
c = (p_work[index0 + 0] - p_work[index1 + 0]) / 2;
d = (p_work[index0 + 1] - p_work[index1 + 1]) / 2;
p_work[index0 + 0] = a;
p_work[index0 + 1] = b;
p_work[index1 + 0] = (c * cc + d * cs) / (cc * cc + cs * cs);
p_work[index1 + 1] = (c * cs - d * cc) / (cc * cc + cs * cs);
}
index0 = 0;
index1 = 4;
{
cc = p_c[coef];
cs = p_s[coef--];
a = (p_work[index0 + 0] + p_work[index1 + 0]) / 2;
b = (p_work[index0 + 1] + p_work[index1 + 1]) / 2;
c = (p_work[index0 + 0] - p_work[index1 + 0]) / 2;
d = (p_work[index0 + 1] - p_work[index1 + 1]) / 2;
p_work[index0 + 0] = a;
p_work[index0 + 1] = b;
p_work[index1 + 0] = (c * cc + d * cs) / (cc * cc + cs * cs);
p_work[index1 + 1] = (c * cs - d * cc) / (cc * cc + cs * cs);
}
loop1 = 2;
cc = p_c[coef];
cs = p_s[coef--];
offset = 4
index0 = 4;
index1 = 6;
for (j = 0; j < loop1; ++j) {
a = (p_work[index0 + 0] + p_work[index1 + 0]) / 2;
b = (p_work[index0 + 1] + p_work[index1 + 1]) / 2;
c = (p_work[index0 + 0] - p_work[index1 + 0]) / 2;
d = (p_work[index0 + 1] - p_work[index1 + 1]) / 2;
p_work[index0 + 0] = a;
p_work[index0 + 1] = b;
p_work[index1 + 0] = (c * cc + d * cs) / (cc * cc + cs * cs);
p_work[index1 + 1] = (c * cs - d * cc) / (cc * cc + cs * cs);
index0 -= offset;
index1 -= offset;
}
次に1番目と2番目の逆変換をループに置き換える。そのループを3、4番目の逆変換にも適用。
coef = 2;
loop1 = 1;
loop2 = 2;
offset = 8;
index0 = 2;
index1 = 6;
for (k = 0; k < loop2; ++k) {
cc = p_c[coef];
cs = p_s[coef--];
for (j = 0; j < loop1; ++j) {
a = (p_work[index0 + 0] + p_work[index1 + 0]) / 2;
b = (p_work[index0 + 1] + p_work[index1 + 1]) / 2;
c = (p_work[index0 + 0] - p_work[index1 + 0]) / 2;
d = (p_work[index0 + 1] - p_work[index1 + 1]) / 2;
p_work[index0 + 0] = a;
p_work[index0 + 1] = b;
p_work[index1 + 0] = (c * cc + d * cs) / (cc * cc + cs * cs);
p_work[index1 + 1] = (c * cs - d * cc) / (cc * cc + cs * cs);
index0 -= offset;
index1 -= offset;
}
index0 -= 2 + nsmpl;
index1 -= 2 + nsmpl;
}
loop1 = 2;
loop2 = 1;
offset = 4
index0 = 4
index1 = 6
for (k = 0; k < loop2; ++k) {
cc = p_c[coef];
cs = p_s[coef--];
for (j = 0; j < loop1; ++j) {
a = (p_work[index0 + 0] + p_work[index1 + 0]) / 2;
b = (p_work[index0 + 1] + p_work[index1 + 1]) / 2;
c = (p_work[index0 + 0] - p_work[index1 + 0]) / 2;
d = (p_work[index0 + 1] - p_work[index1 + 1]) / 2;
p_work[index0 + 0] = a;
p_work[index0 + 1] = b;
p_work[index1 + 0] = (c * cc + d * cs) / (cc * cc + cs * cs);
p_work[index1 + 1] = (c * cs - d * cc) / (cc * cc + cs * cs);
index0 -= offset;
index1 -= offset;
}
index0 -= 2 + nsmpl;
index1 -= 2 + nsmpl;
}
外側のループにまとめる。
外側ループの初期値はDCTの4番目の変換を実行する直前の値になるように頑張って調整する。
coef = 2;
for (i = nlnn - 2; i >= 0; i--) {
loop1 = 1 << (nlnn - 2 - i);
loop2 = 1 << i;
offset = 1 << (i + 2);
index0 = offset * (loop1 * loop2 - 1) + (2 - nsmpl) * (loop2 - 1);
index1 = nsmpl - 2;
for (k = 0; k < loop2; ++k) {
cc = p_c[coef];
cs = p_s[coef--];
for (j = 0; j < loop1; ++j) {
a = (p_work[index0 + 0] + p_work[index1 + 0]) / 2;
b = (p_work[index0 + 1] + p_work[index1 + 1]) / 2;
c = (p_work[index0 + 0] - p_work[index1 + 0]) / 2;
d = (p_work[index0 + 1] - p_work[index1 + 1]) / 2;
p_work[index0 + 0] = a;
p_work[index0 + 1] = b;
p_work[index1 + 0] = (c * cc + d * cs) / (cc * cc + cs * cs);
p_work[index1 + 1] = (c * cs - d * cc) / (cc * cc + cs * cs);
index0 -= offset;
index1 -= offset;
}
index0 -= 2 + nsmpl;
index1 -= 2 + nsmpl;
}
}
ループ外のcoefの初期値はサンプル数を変えて値の変化を観察する。
4 -> 0
8 -> 2
16 -> 6
32 -> 14
...
128 -> 62
となるので、(サンプル数/2) -2みたいでした。
coef = (nsmpl >> 1) - 2;
for (i = nlnn - 2; i >= 0; --i) {
loop1 = 1 << (nlnn - 2 - i);
loop2 = 1 << i;
offset = 1 << (i + 2);
index0 = offset * (loop1 * loop2 - 1) + (2 - nsmpl) * (loop2 - 1);
index1 = nsmpl - 2;
for (k = 0; k < loop2; ++k) {
cc = p_c[coef];
cs = p_s[coef--];
for (j = 0; j < loop1; ++j) {
a = (p_work[index0 + 0] + p_work[index1 + 0]) / 2;
b = (p_work[index0 + 1] + p_work[index1 + 1]) / 2;
c = (p_work[index0 + 0] - p_work[index1 + 0]) / 2;
d = (p_work[index0 + 1] - p_work[index1 + 1]) / 2;
p_work[index0 + 0] = a;
p_work[index0 + 1] = b;
p_work[index1 + 0] = (c * cc + d * cs) / (cc * cc + cs * cs);
p_work[index1 + 1] = (c * cs - d * cc) / (cc * cc + cs * cs);
index0 -= offset;
index1 -= offset;
}
index0 -= 2 + nsmpl;
index1 -= 2 + nsmpl;
}
}
ということでDCTからIDCTができました。パチパチ。