0
0

More than 3 years have passed since last update.

ZLIB/GZIP DEFLATE 圧縮データの解凍処理は難しくないので作ってみる(C++03版)

Last updated at Posted at 2021-04-12

Python版 を参考に C++03仕様(?)で、ざっくりと作ってみる。


使用したコンパイラ

Apple clang version 12.0.0 (clang-1200.0.32.29)
Target: x86_64-apple-darwin20.3.0

解凍プログラム (短くないので折りたたみ)
main.cpp
#include <cassert>
#include <cstdio>
#include <cstdint>

#include <exception>
#include <string>
#include <vector>

typedef std::vector<uint8_t> byte_array;
typedef std::vector<uint16_t> ushort_array;

/////////////////////////////////////////////////////////////////////
//
// 定数テーブル用マクロ
//   const type table[256] = { CONST_TABLE_256(func, 0) };
//
/////////////////////////////////////////////////////////////////////

#define CONST_TABLE_1(func, n)  func(n)
#define CONST_TABLE_2(func, n)  CONST_TABLE_1(func, (n)), CONST_TABLE_1(func, (n)+1)
#define CONST_TABLE_4(func, n)  CONST_TABLE_2(func, (n)), CONST_TABLE_2(func, (n)+2)
#define CONST_TABLE_8(func, n)  CONST_TABLE_4(func, (n)), CONST_TABLE_4(func, (n)+4)
#define CONST_TABLE_16(func, n)  CONST_TABLE_8(func, (n)), CONST_TABLE_8(func, (n)+8)
#define CONST_TABLE_32(func, n)  CONST_TABLE_16(func, (n)), CONST_TABLE_16(func, (n)+16)
#define CONST_TABLE_64(func, n)  CONST_TABLE_32(func, (n)), CONST_TABLE_32(func, (n)+32)
#define CONST_TABLE_128(func, n)  CONST_TABLE_64(func, (n)), CONST_TABLE_64(func, (n)+64)
#define CONST_TABLE_256(func, n)  CONST_TABLE_128(func, (n)), CONST_TABLE_128(func, (n)+128)

/////////////////////////////////////////////////////////////////////
//
// コマンド ライン オプションなど
//
/////////////////////////////////////////////////////////////////////

static bool flag_debug = false;
static bool flag_verbose = false;

/*
 * メッセージ
 */

#define DEBUG_MESSAGE(...) { if (flag_debug) printf(__VA_ARGS__); }
#define ERROR_MESSAGE(...) printf(__VA_ARGS__)
#define VERBOSE(...) { if (flag_verbose) printf(__VA_ARGS__); }

template <typename T>
void verbose_list(const T &list, const char *prefix, int indent, size_t width = 16)
{
    printf("%s:\n", prefix);

    std::string lpfx(indent, ' ');
    uint_fast32_t vmax = 0;

    for (typename T::const_iterator it = list.begin(); it != list.end(); ++it)
        vmax = (vmax > *it) ? vmax : *it;

    char fmt[8];
    fmt[0] = '%';
    fmt[1] = 's';
    fmt[2] = '%';
    fmt[3] = '0' + ((vmax < 10) ? 1 : ((vmax < 100) ? 2 : 3));
    fmt[4] = 'd';
    fmt[5] = 0;

    for (size_t s = 0; s < list.size(); s += width)
    {
        printf("%s%04zx: ", lpfx.c_str(), s);

        const char *sep = "";
        size_t end = std::min(s+width, list.size());

        for (size_t t = s; t < end; ++t)
        {
            printf(fmt, sep, list[t]);
            sep = "  ";
        }
        printf("\n");
    }
}

/////////////////////////////////////////////////////////////////////
//
// エラー / 例外
//
//   enum inflate_error_code;
//   class inflate_error;
//
/////////////////////////////////////////////////////////////////////

/*
 * 解凍処理のエラーコード
 */
enum inflate_error_code
{
    inflate_success, // 成功

    inflate_error_input,  // 入力エラー
    inflate_error_output, // 出力エラー

    inflate_error_header,   // ヘッダ部の異常
    inflate_error_checksum, // チェック・サム異常
    inflate_error_size,     // 解凍サイズ異常

    inflate_error_block_type, // BTYPE 異常

    // BTYPE=00
    inflate_error_btype00_len,  // LEN と NLEN の不一致

    // BTYPE=10
    inflate_error_dictionary_code_length,      // コード辞書の不整合
    inflate_error_dictionary_decode_length,    // 文字/距離の符号長復号に失敗
    inflate_error_dictionary_litlen_length,    // 文字辞書の不整合
    inflate_error_dictionary_distance_length,  // 距離辞書の不整合

    // BTYPE=01,10
    inflate_error_decode_litlen,   // 文字(長)復号に失敗
    inflate_error_litlen_value,     // 文字(長)が異常
    inflate_error_decode_distance, // 距離復号に失敗
    inflate_error_distance_value,   // 距離が異常

    // バグ
    inflate_error_bug,
};

/*
 * エラー時の例外
 */
class inflate_error : public std::exception
{
public:
    inflate_error_code code;

    inflate_error(inflate_error_code code);
    virtual const char *what() const _NOEXCEPT;

    static const char *name(inflate_error_code code);
};

// inflate_error::メンバー

inline inflate_error::inflate_error(inflate_error_code value)
    : code(value)
{ /*NOOP*/ }

const char *inflate_error::what() const _NOEXCEPT
{
    return name(code);
}

const char *inflate_error::name(inflate_error_code code)
{
    switch (code)
    {
    case inflate_success:
        return "success";
    case inflate_error_input:
        return "input";
    case inflate_error_output:
        return "output";
    case inflate_error_header:
        return "header";
    case inflate_error_checksum:
        return "checksum";
    case inflate_error_size:
        return "size";
    case inflate_error_block_type:
        return "block_type";
    case inflate_error_btype00_len:
        return "btype00_len";
    case inflate_error_dictionary_code_length:
        return "dictionary_code_length";
    case inflate_error_dictionary_decode_length:
        return "dictionary_decode_length";
    case inflate_error_dictionary_litlen_length:
        return "dictionary_litlen_length";
    case inflate_error_dictionary_distance_length:
        return "dictionary_distance_length";
    case inflate_error_decode_litlen:
        return "decode_litlen";
    case inflate_error_litlen_value:
        return "litlen_value";
    case inflate_error_decode_distance:
        return "decode_distance";
    case inflate_error_distance_value:
        return "distance_value";
    case inflate_error_bug:
        return "bug";
    }
}

/////////////////////////////////////////////////////////////////////
//
// ビット列反転
//
//   uint_fast8_t inverse_uint8(uint_fast8_t value);
//   uint_fast16_t inverse_uint16(uint_fast16_t value);
//   uint_fast16_t inverse_bits(uint_fast16_t value, uint_fast8_t bits);
//
/////////////////////////////////////////////////////////////////////

template <uint8_t x>
struct const_inverse_uint8
{
    enum constant
    {
        b0 = (x & 0x80) ? 0x01 : 0,
        b1 = (x & 0x40) ? 0x02 : 0,
        b2 = (x & 0x20) ? 0x04 : 0,
        b3 = (x & 0x10) ? 0x08 : 0,
        b4 = (x & 0x08) ? 0x10 : 0,
        b5 = (x & 0x04) ? 0x20 : 0,
        b6 = (x & 0x02) ? 0x40 : 0,
        b7 = (x & 0x01) ? 0x80 : 0,
        value = b0 | b1 | b2 | b3 | b4 | b5 | b6 | b7,
    };
};

static const uint8_t inverse_uint8_table[256] = {
#define CONST_INVERSE_UINT8(n) const_inverse_uint8<(n)>::value
    CONST_TABLE_256(CONST_INVERSE_UINT8, 0)
#undef  CONST_INVERSE_UINT8
};

inline uint_fast8_t inverse_uint8(uint_fast8_t value)
{
    return inverse_uint8_table[uint8_t(value)];
}

inline uint_fast16_t inverse_uint16(uint_fast16_t value)
{
    return ((inverse_uint8(uint_fast8_t(value >> 0)) << 8) |
            (inverse_uint8(uint_fast8_t(value >> 8)) << 0));
}

inline uint_fast16_t inverse_bits(uint_fast16_t value, uint_fast8_t bits)
{
    return (inverse_uint16(value) >> (16 - bits));
}

/////////////////////////////////////////////////////////////////////
//
// CRC32
//
//   class crc32;
//
/////////////////////////////////////////////////////////////////////

class crc32
{
protected:
    uint32_t value;

public:
    crc32();
    crc32(const void *data, size_t size);

    operator uint_fast32_t() const;
    uint_fast32_t get_value() const;

    uint_fast8_t update(uint_fast8_t data);
    void update(const void *data, size_t size);

    static uint_fast32_t update(uint_fast32_t value, uint_fast8_t data);
    static uint_fast32_t update(uint_fast32_t value, const void *data, size_t size);

protected:
    template <uint8_t x>
    struct table_value
    {
        enum constant
        {
            m = 0x7fffffff,
            p = 0xedb88320,
            c0 = x,
            c1 = ((c0 >> 1) & m) ^ ((c0 & 1) ? p : 0),
            c2 = ((c1 >> 1) & m) ^ ((c1 & 1) ? p : 0),
            c3 = ((c2 >> 1) & m) ^ ((c2 & 1) ? p : 0),
            c4 = ((c3 >> 1) & m) ^ ((c3 & 1) ? p : 0),
            c5 = ((c4 >> 1) & m) ^ ((c4 & 1) ? p : 0),
            c6 = ((c5 >> 1) & m) ^ ((c5 & 1) ? p : 0),
            c7 = ((c6 >> 1) & m) ^ ((c6 & 1) ? p : 0),
            c8 = ((c7 >> 1) & m) ^ ((c7 & 1) ? p : 0),
            value = c8,
        };
    };
    static const uint32_t table[256];
};

// crc32::メンバー

const uint32_t crc32::table[256] = {
#define CONST_CRC32_VALUE(n) table_value<(n)>::value
    CONST_TABLE_256(CONST_CRC32_VALUE, 0)
#undef  CONST_CRC32_VALUE
};

inline uint_fast32_t crc32::update(uint_fast32_t value, uint_fast8_t data)
{
    return ((value >> 8) ^ table[uint8_t(value ^ data)]);
}

uint_fast32_t crc32::update(uint_fast32_t value, const void *data, size_t size)
{
    const uint8_t *p = reinterpret_cast<const uint8_t *>(data);

    while (size-- > 0)
        value = update(value, *p++);
    return value;
}

inline crc32::crc32()
    : value(~0)
{ /*NOOP*/ }

inline crc32::crc32(const void *data, size_t size)
    : value(update(~0, data, size))
{ /*NOOP*/ }

inline uint8_t crc32::update(uint_fast8_t data)
{
    value = update(value, data);
    return data;
}

inline void crc32::update(const void *data, size_t size)
{
    value = update(value, data, size);
}

inline crc32::operator uint_fast32_t() const
{
    return get_value();
}

inline uint_fast32_t crc32::get_value() const
{
    return uint32_t(~value);
}

/////////////////////////////////////////////////////////////////////
//
// ビット単位の読み込み用クラス
//
// 必須メソッド:
//     uint_fast16_t read(uint_fast8_t bit_width);
//     void align();
//
/////////////////////////////////////////////////////////////////////

/*
 * ビット単位の管理用クラス
 */
class istream_bit_buffer
{
protected:
    uint32_t data;
    uint8_t size;

public:
    istream_bit_buffer();
    bool check(uint_fast8_t width);
    void push(uint_fast8_t byte_data);
    uint_fast16_t pop(uint_fast8_t width);
    void align();
};

// istream_bit_buffer::メンバー

inline istream_bit_buffer::istream_bit_buffer()
    : data(0)
    , size(0)
{ /*NOOP*/ }

inline bool istream_bit_buffer::check(uint_fast8_t width)
{
    return (size >= width);
}

inline void istream_bit_buffer::push(uint_fast8_t byte_data)
{
    data |= uint8_t(byte_data) << size;
    size += 8;
    assert(size <= 32);
}

inline uint_fast16_t istream_bit_buffer::pop(uint_fast8_t width)
{
    assert(size >= width && width <= 16);

    uint_fast16_t mask = ((1 << width) - 1);
    uint_fast16_t value = data & mask;

    data >>= width;
    size -= width;
    return value;
}

inline void istream_bit_buffer::align()
{
    uint_fast8_t gap = size & 7;

    data >>= gap;
    size -= gap;
}

/*
 * メモリ上のデータを使う
 */
class istream_byte_array
{
protected:
    istream_bit_buffer buffer;
    byte_array::iterator pointer;
public:
    byte_array data;

public:
    istream_byte_array();
    void reset_pointer();
    uint_fast16_t read(uint_fast8_t width);
    void align();
};

// istream_byte_array::メンバー

inline istream_byte_array::istream_byte_array()
    : buffer()
    , pointer()
    , data()
{ /*NOOP*/ }

inline void istream_byte_array::reset_pointer()
{
    pointer = data.begin();
}

inline uint_fast16_t istream_byte_array::read(uint_fast8_t width)
{
    while (!buffer.check(width))
    {
        if (pointer == data.end())
            throw inflate_error(inflate_error_input);
        buffer.push(*pointer++);
    }
    return buffer.pop(width);
}

inline void istream_byte_array::align()
{
    buffer.align();
}

/*
 * ファイル ポインタを使う
 */
class istream_file_pointer
    : public istream_bit_buffer
{
protected:
    FILE *pointer;
public:
    istream_file_pointer(const char *path);
    ~istream_file_pointer();
    uint_fast16_t read(uint_fast8_t width);
};

// istream_file_pointer::メンバー

inline istream_file_pointer::istream_file_pointer(const char *path)
    : pointer(fopen(path, "rb"))
{
    if (pointer == nullptr)
        throw inflate_error(inflate_error_input);
}

inline istream_file_pointer::~istream_file_pointer()
{
    if (pointer != nullptr)
        fclose(pointer);
}

inline uint_fast16_t istream_file_pointer::read(uint_fast8_t width)
{
    while (!check(width))
    {
        int c = fgetc(pointer);
        if (c == EOF)
            throw inflate_error(inflate_error_input);
        push(uint_fast8_t(c));
    }
    return pop(width);
}

/*
 * リトル・エンディアンでの 32bit 読み込み
 */
template <typename iT>
inline uint_fast32_t read_uint32le(iT &istream)
{
    uint_fast32_t b0 = istream.read(8);
    uint_fast32_t b1 = istream.read(8);
    uint_fast32_t b2 = istream.read(8);
    uint_fast32_t b3 = istream.read(8);
    return ((b0 << 000) |
            (b1 << 010) |
            (b2 << 020) |
            (b3 << 030));
}

/////////////////////////////////////////////////////////////////////
//
// バイトデータの出力用クラス
//
// 必須メソッド:
//     void write(uint_fast8_t literal);
//
/////////////////////////////////////////////////////////////////////

/*
 * 解凍結果をメモリに保持する
 */
class ostream_byte_array
    : public byte_array
{
public:
    void write(uint_fast8_t literal);
};

// ostream_byte_array::メンバー

inline void ostream_byte_array::write(uint_fast8_t literal)
{
    push_back(literal);
}

/*
 * 解凍結果を直接出力する
 */
class ostream_file_pointer
{
protected:
    FILE *pointer;
public:
    ostream_file_pointer(const char *path);
    ~ostream_file_pointer();
    void write(uint_fast8_t literal);
};

// ostream_file_pointer::メンバー

inline ostream_file_pointer::ostream_file_pointer(const char *path)
    : pointer((path != nullptr) ? fopen(path, "wb") : nullptr)
{
    if (path != nullptr && pointer == nullptr)
        throw inflate_error(inflate_error_output);
}

inline ostream_file_pointer::~ostream_file_pointer()
{
    if (pointer != nullptr)
        fclose(pointer);
}

inline void ostream_file_pointer::write(uint_fast8_t literal)
{
    if (pointer != nullptr &&
        fputc(int(literal), pointer) == EOF)
        throw inflate_error(inflate_error_output);
}

/*
 * 出力とチェック・サムを行う
 */
template <typename oT, typename cT>
class ostream_checksum
{
protected:
    oT &stream;
    cT checksum;
    size_t size;

public:
    ostream_checksum(oT &stream);

    void write(uint_fast8_t literal);
    uint_fast32_t get_sum();
    size_t get_size();
};

// ostream_checksum::メンバー

template <typename oT, typename cT>
inline ostream_checksum<oT, cT>::ostream_checksum(oT &s)
    : stream(s)
    , checksum()
    , size(0)
{ /*NOOP*/ }

template <typename oT, typename cT>
inline void ostream_checksum<oT, cT>::write(uint_fast8_t literal)
{
    stream.write(literal);
    checksum.update(literal);
    ++size;
}

template <typename oT, typename cT>
inline uint_fast32_t ostream_checksum<oT, cT>::get_sum()
{
    return checksum.get_value();
}

template <typename oT, typename cT>
inline size_t ostream_checksum<oT, cT>::get_size()
{
    return size;
}

/////////////////////////////////////////////////////////////////////
//
// ハフマン符号
//
/////////////////////////////////////////////////////////////////////

/*
 * ハフマン符号のビット幅に対する文字(値)の一覧
 */
struct length_value {
    uint8_t length;     // ビット数
    uint8_t step;       // 検索用差分
    uint16_t code;      // 開始符号
    ushort_array value; // 文字(値)の一覧
};

// ハフマン符号のビット幅情報一覧
typedef std::vector<length_value> vector_length_value;

/*
 * 動的辞書
 */
class dynamic_dictionary
    : public vector_length_value
{
public:
    dynamic_dictionary(size_t length);
    void add(uint_fast8_t length, uint_fast16_t value);
    void add(uint8_t *table, uint_fast16_t count);
    bool build();

    template <typename T>
    int_fast16_t decode(T &istream) const;

    ushort_array length_list() const;
};

// dynamic_dictionary::メンバー

inline dynamic_dictionary::dynamic_dictionary(size_t length)
    : vector_length_value(length + 1)
{
    for (iterator it = begin(); it != end(); ++it)
        (*it).length = it - begin();
}

inline void dynamic_dictionary::add(uint_fast8_t length, uint_fast16_t value)
{
    if (length != 0)
        at(length).value.push_back(value);
}

inline void dynamic_dictionary::add(uint8_t *table, uint_fast16_t count)
{
    for (uint_fast16_t value = 0; value < count; value++)
        add(table[value], value);
}

inline bool dynamic_dictionary::build()
{
    // 文字(値)が存在しないものを削除
    for (iterator it = begin(); it != end();)
        if ((*it).value.size() == 0)
            it = erase(it);
        else
            ++it;
    if (size() == 0)
        return false;

    // 復号情報を生成
    size_t last_length = 0;
    uint_fast32_t code = 0;

    for (iterator it = begin(); it != end(); ++it)
    {
        length_value &lv = *it;
        uint_fast8_t length = lv.length;
        uint_fast8_t step = length - last_length;
        size_t count = lv.value.size();

        code <<= step;

        lv.step = step;
        lv.code = uint16_t(code);

        last_length = length;
        code += uint_fast32_t(count);
    }
    return (code <= (1 << last_length));
}

template <typename T>
inline int_fast16_t dynamic_dictionary::decode(T &istream) const
{
    uint_fast8_t pending = 0;
    uint_fast16_t code = 0;

    // 符号の範囲検索による復号
    for (const_iterator it = begin(); it != end(); ++it)
    {
        const length_value &lv = *it;
        uint_fast8_t step = lv.step;
        uint_fast16_t start = lv.code;
        uint_fast16_t count = uint_fast16_t(lv.value.size());
        uint_fast16_t end = start + count;

        code <<= step;
        pending += step;
        if (code < end)
        {
            code |= inverse_bits(istream.read(pending), pending);
            if (code < end)
                return lv.value[code - start];
            pending = 0;
        }
    }
    return -1;
}

// デバッグ用
ushort_array dynamic_dictionary::length_list() const
{
    uint_fast16_t vmax = 0;

    for (const_iterator it = begin(); it != end(); ++it)
    {
        const length_value &lv = *it;

        for (ushort_array::const_iterator vit = lv.value.begin(); vit != lv.value.end(); ++vit)
            vmax = (vmax > *vit) ? vmax : *vit;
    }

    ushort_array list(vmax+1, 0);

    for (const_iterator it = begin(); it != end(); ++it)
    {
        const length_value &lv = *it;

        for (ushort_array::const_iterator vit = lv.value.begin(); vit != lv.value.end(); ++vit)
            list[*vit] = lv.length;
    }

    return list;
}

/*
 * 文字(長)の固定辞書
 */
struct fixed_litlen
{
    template <typename T>
    int_fast16_t decode(T &istream) const;
};

// fixed_litlen::メンバー

template <typename T>
inline int_fast16_t fixed_litlen::decode(T &istream) const
{
    int_fast16_t code;

    code = inverse_uint8(uint_fast8_t(istream.read(7) << 1));
    if (code < 0x18)
        return (code + 256);

    code = (code << 1) | istream.read(1);
    if (code < 0xc0)
        return (code - 0x30);
    if (code < 0xc8)
        return (code - 0xc0 + 280);

    code = (code << 1) | istream.read(1);
    return (code - 0x190 + 144);
}

/*
 * 距離の固定辞書
 */
struct fixed_distance
{
    template <typename T>
    int_fast16_t decode(T &istream) const;
};

// fixed_distance::メンバー

template <typename T>
inline int_fast16_t fixed_distance::decode(T &istream) const
{
    return inverse_uint8(uint_fast8_t(istream.read(5) << 3));
}

/////////////////////////////////////////////////////////////////////
//
// DEFLATE ブロックの解凍
//
/////////////////////////////////////////////////////////////////////

/*
 * LZ 復号用ウインドウ
 */
class lz_window
{
protected:
    uint16_t mask;
    uint16_t position;
    uint8_t window[32768];

public:
    lz_window();
    uint_fast16_t get_position(uint_fast16_t distance = 0);
    uint_fast8_t get(uint_fast16_t position);
    void put(uint_fast8_t literal);
};

inline lz_window::lz_window()
    : mask(0x7fff)
    , position(0)
{ /*NOOP*/ }

inline uint_fast16_t lz_window::get_position(uint_fast16_t distance)
{
    return (position - distance);
}

inline uint_fast8_t lz_window::get(uint_fast16_t index)
{
    return window[index & mask];
}

inline void lz_window::put(uint_fast8_t literal)
{
    window[position++ & mask] = literal;
}

/*
 * BTYPE=00
 */
template <typename iT, typename oT>
inline void inflate_block_type00(lz_window &lz_buffer, iT &istream, oT &ostream)
{
    DEBUG_MESSAGE("BTYPE=00\n");

    uint_fast16_t len = istream.read(16);
    uint_fast16_t nlen = istream.read(16);

    if (len != uint16_t(~nlen))
        throw inflate_error(inflate_error_btype00_len);
    while (len-- > 0)
    {
        uint_fast16_t literal(istream.read(8));
        lz_buffer.put(uint_fast8_t(literal));
        ostream.write(uint_fast8_t(literal));
    }
}

/*
 * BTYPE=01, BTYPE=10 の圧縮データの解凍
 */
template <typename iT, typename oT, typename lT, typename dT>
inline void inflate_block_data(lz_window &lz_buffer,
                               iT &istream,
                               oT &ostream,
                               const lT &litlen_dictionary,
                               const dT &distance_dictionary)
{
    VERBOSE("COMPRESSED DATA:\n");

    for (;;)
    {
        int_fast16_t code;

        code = litlen_dictionary.decode(istream);
        if (code < 0)
            throw inflate_error(inflate_error_decode_litlen);

        uint_fast16_t literal = uint_fast16_t(code);

        if (literal < 256)
        {
            lz_buffer.put(uint_fast8_t(literal));
            ostream.write(uint_fast8_t(literal));
            VERBOSE("  LITERAL[%#04x]: %c\n", literal, (std::isprint(literal) ? literal : ' '));
            continue;
        }

        if (literal == 256)
        {
            VERBOSE("  END:\n");
            return;
        }

        uint_fast16_t length = uint_fast16_t(literal - 257 + 3);

        if (length < 11)
            /*NOOP*/;
        else if (literal < 285)
        {
            uint_fast16_t lm7 = length - 7;
            uint_fast8_t bits = uint_fast8_t(lm7 >> 2);
            uint_fast16_t offs = (lm7 & 3) + 4;

            length = 3 + (offs << bits) + istream.read(bits);
        }
        else if (literal < 286)
            length = 258;
        else
            throw inflate_error(inflate_error_litlen_value);

        code = distance_dictionary.decode(istream);
        if (code < 0)
            throw inflate_error(inflate_error_decode_distance);

        uint_fast16_t distance = uint_fast16_t(code + 1);

        if (distance < 5)
            /*NOOP*/;
        else if (distance < 31)
        {
            uint_fast16_t dm3 = distance - 3;
            uint_fast8_t bits = uint_fast8_t(dm3 >> 1);
            uint_fast16_t offs = (dm3 & 1) + 2;

            distance = 1 + (offs << bits) + istream.read(bits);
        }
        else
            throw inflate_error(inflate_error_distance_value);

        VERBOSE("  LENGTH=%d, DISTANCE=%d\n", length, distance);

        uint_fast16_t winpos = lz_buffer.get_position(distance);

        do
        {
            uint_fast8_t data(lz_buffer.get(winpos++));
            lz_buffer.put(data);
            ostream.write(data);
        }
        while (--length > 0);
    }
}

/*
 * BTYPE=01
 */
template <typename iT, typename oT>
inline void inflate_block_type01(lz_window &lz_buffer, iT &istream, oT &ostream)
{
    DEBUG_MESSAGE("BTYPE=01\n");

    fixed_litlen litlen;
    fixed_distance distance;
    inflate_block_data(lz_buffer, istream, ostream, litlen, distance);
}

/*
 * BTYPE=10
 */
template <typename iT>
inline bool inflate_block_type10_dynamic(iT &istream, const
                                         dynamic_dictionary &code,
                                         uint_fast16_t length_count,
                                         uint8_t *output)
{
    uint_fast16_t last = 0;
    uint_fast16_t value = 0;

    while (value < length_count)
    {
        int_fast16_t length = code.decode(istream);

        if (length < 0)
        {
            VERBOSE("  inflate_block_type10_dynamic: invalid length\n");
            return false;
        }
        if (length < 16)
        {
            output[value++] = length;
            last = length;
            continue;
        }

        uint_fast8_t count = 0;

        if (length == 16)
            count = uint_fast8_t(istream.read(2) + 3);
        else
        {
            last = 0;
            if (length == 17)
                count = uint_fast8_t(istream.read(3) + 3);
            else
                count = uint_fast8_t(istream.read(7) + 11);
        }
        while (count-- > 0)
        {
            if (value >= length_count)
            {
                VERBOSE("  inflate_block_type10_dynamic overflow: %d > %d, %d\n", value, length_count, last);
                return false;
            }
            output[value++] = last;
        }
    }
    return true;
}

template <typename iT, typename oT>
inline void inflate_block_type10(lz_window &lz_buffer, iT &istream, oT &ostream)
{
    DEBUG_MESSAGE("BTYPE=10\n");

    static const uint8_t code_order[19] = {
        16, 17, 18,
        0,  8,  7,  9,  6, 10,  5, 11,
        4, 12,  3, 13,  2, 14,  1, 15
    };

    dynamic_dictionary code(7);
    dynamic_dictionary litlen(15);
    dynamic_dictionary distance(15);

    uint_fast16_t hlit = istream.read(5) + 257;
    uint_fast16_t hdist = istream.read(5) + 1;
    uint_fast8_t hclen = istream.read(4) + 4;

    VERBOSE("%8s: %d + 257 = %d\n", "HLIT", hlit-257, hlit);
    VERBOSE("%8s: %d + 1 = %d\n", "HDIST", hdist-1, hdist);
    VERBOSE("%8s: %d + 4 = %d\n", "HCLEN", hclen-4, hclen);

    uint8_t code_length[289+33+2];

    memset(code_length, 0, 19);
    for (uint_fast8_t index = 0; index < hclen; ++index)
        code_length[code_order[index]] = uint8_t(istream.read(3));
    for (uint_fast8_t index = 0; index < 19; ++index)
        code.add(code_length[index], index);

    if (flag_verbose)
    {
        byte_array code_data;
        for (uint_fast8_t index = 0; index < hclen; ++index)
            code_data.push_back(code_length[code_order[index]]);
        verbose_list(code_data, "  code length", 4);
    }

    if (!code.build())
        throw inflate_error(inflate_error_dictionary_code_length);

    memset(code_length, 0, sizeof(code_length));
    if (!inflate_block_type10_dynamic(istream, code, hlit + hdist, code_length))
        throw inflate_error(inflate_error_dictionary_decode_length);

    litlen.add(code_length, hlit);
    if (!litlen.build())
        throw inflate_error(inflate_error_dictionary_litlen_length);
    if (flag_verbose)
        verbose_list(litlen.length_list(), "  literal/length code length", 4);

    distance.add(code_length + hlit, hdist);
    if (!distance.build())
        throw inflate_error(inflate_error_dictionary_distance_length);
    if (flag_verbose)
        verbose_list(distance.length_list(), "  distance code length", 4);

    inflate_block_data(lz_buffer, istream, ostream, litlen, distance);
}

/*
 * ブロックの解凍
 */
template <typename iT, typename oT>
void inflate_block(iT &istream, oT &ostream)
{
    lz_window lz_buffer;
    bool bfinal;

    VERBOSE("======== BLOCK ========\n");
    do
    {
        bfinal = istream.read(1);
        uint_fast16_t btype = istream.read(2);
        VERBOSE("%8s: %d\n", "BFINAL", bfinal);
        switch (btype)
        {
        case 0:
            istream.align();
            VERBOSE("%8s: 00\n", "BTYPE");
            inflate_block_type00(lz_buffer, istream, ostream);
            break;
        case 1:
            VERBOSE("%8s: 01\n", "BTYPE");
            inflate_block_type01(lz_buffer, istream, ostream);
            break;
        case 2:
            VERBOSE("%8s: 10\n", "BTYPE");
            inflate_block_type10(lz_buffer, istream, ostream);
            break;
        default:
            ERROR_MESSAGE("unknown BTYPE=11\n");
            throw inflate_error(inflate_error_block_type);
        }
    }
    while (!bfinal);
    istream.align();
}

/////////////////////////////////////////////////////////////////////
//
// GZIP 圧縮データの解凍
//
//   class gzip;
//
/////////////////////////////////////////////////////////////////////

class gzip
{
public:
    uint8_t id1, id2;
    uint8_t cm;
    uint8_t flg;
    uint32_t mtime;
    uint8_t xfl;
    uint8_t os;
    byte_array extra;
    std::string name;
    std::string comment;
    uint16_t hcrc;
    uint32_t crc;
    uint32_t isize;

    gzip();

    bool flg_ftext() const;
    bool flg_fhcrc() const;
    bool flg_fextra() const;
    bool flg_fname() const;
    bool flg_fcomment() const;

    template <typename iT>
    bool read_header(iT &istream);

    template <typename iT, typename oT>
    void inflate(iT &istream, oT &ostream);
};

// gzip::メンバー

gzip::gzip()
    : id1(0)
    , id2(0)
    , cm(0)
    , flg(0)
    , mtime(0)
    , xfl(0)
    , os(0)
    , extra()
    , name()
    , comment()
    , hcrc(0)
    , crc(0)
    , isize(0)
{ /*NOOP*/ }

inline bool gzip::flg_ftext() const
{
    return (flg & 0x01);
}

inline bool gzip::flg_fhcrc() const
{
    return (flg & 0x02);
}

inline bool gzip::flg_fextra() const
{
    return (flg & 0x04);
}

inline bool gzip::flg_fname() const
{
    return (flg & 0x08);
}

inline bool gzip::flg_fcomment() const
{
    return (flg & 0x10);
}

template <typename iT>
bool gzip::read_header(iT &istream)
{
    crc32 crc16;

    DEBUG_MESSAGE("gzip::read:\n");

    id1 = uint8_t(crc16.update(istream.read(8)));
    if (id1 != 0x1f)
    {
        ERROR_MESSAGE("unknown ID1: %#04x\n", id1);
        return false;
    }

    id2 = uint8_t(crc16.update(istream.read(8)));
    if (id2 != 0x8b)
    {
        ERROR_MESSAGE("unknown ID2: %#04x\n", id2);
        return false;
    }

    VERBOSE("FILE MAGIC: %#04x,%#04x\n", id1, id2);

    cm = uint8_t(crc16.update(istream.read(8)));
    if (cm != 8)
    {
        ERROR_MESSAGE("unknown method: CM=%d\n", cm);
        return false;
    }
    VERBOSE("%8s: %#04x\n", "CM", cm);

    flg = uint8_t(crc16.update(istream.read(8)));
    VERBOSE("%8s: %#04x\n", "FLG", flg);

    {
        uint_fast32_t b0 = crc16.update(istream.read(8));
        uint_fast32_t b1 = crc16.update(istream.read(8));
        uint_fast32_t b2 = crc16.update(istream.read(8));
        uint_fast32_t b3 = crc16.update(istream.read(8));
        mtime = ((b0 << 000) |
                 (b1 << 010) |
                 (b2 << 020) |
                 (b3 << 030));
    }
    VERBOSE("%8s: %#010x\n", "MTIME", mtime);

    xfl = uint8_t(crc16.update(istream.read(8)));
    VERBOSE("%8s: %#04x\n", "XFL", xfl);

    os = uint8_t(crc16.update(istream.read(8)));
    VERBOSE("%8s: %#04x\n", "OS", os);

    if (flg_fextra())
    {
        uint_fast16_t b0 = crc16.update(istream.read(8));
        uint_fast16_t b1 = crc16.update(istream.read(8));
        uint_fast16_t xlen = (b0 | (b1 << 8));

        VERBOSE("%8s: XLEN=%d\n", "XLEN", xlen);
        while (xlen-- > 0)
            extra.push_back(uint8_t(crc16.update(istream.read(8))));
    }

    while (flg_fname())
    {
        uint_fast8_t ch = crc16.update(istream.read(8));
        if (ch == 0)
            break;
        name += char(ch);
    }
    if (flg_fname())
        VERBOSE("%8s: %s\n", "NAME", name.c_str());

    while (flg_fcomment())
    {
        uint_fast8_t ch = crc16.update(istream.read(8));
        if (ch == 0)
            break;
        comment += char(ch);
    }
    if (flg_fcomment())
        VERBOSE("%8s: %s\n", "COMMENT", comment.c_str());

    if (flg_fhcrc())
    {
        hcrc = uint16_t(istream.read(16));
        if (hcrc != uint16_t(crc16.get_value()))
        {
            ERROR_MESSAGE("error: HCRC: %#06x != %#06x\n", hcrc, uint16_t(crc16.get_value()));
            return false;
        }
        VERBOSE("%8s: %#06x\n", "HCRC", hcrc);
    }
    return true;
}

template <typename iT, typename oT>
void gzip::inflate(iT &istream, oT &ostream)
{
    ostream_checksum<oT, crc32> output(ostream);

    if (!read_header(istream))
        throw inflate_error(inflate_error_header);

    inflate_block(istream, output);

    crc = uint32_t(read_uint32le(istream));
    if (output.get_sum() != crc)
    {
        ERROR_MESSAGE("CRC32 ERROR: %#010x != %#010x\n", crc, output.get_sum());
        throw inflate_error(inflate_error_checksum);
    }
    VERBOSE("CRC32: %#010x (%#010x)\n", crc, output.get_sum());

    isize = uint32_t(read_uint32le(istream));
    if (output.get_size() != isize)
    {
        ERROR_MESSAGE("ISIZE ERROR: %#010x != %#010zx\n", isize, output.get_size());
        throw inflate_error(inflate_error_size);
    }
    VERBOSE("ISIZE: %#010x (%#010zx)\n", isize, output.get_size());
}

/////////////////////////////////////////////////////////////////////
//
// ファイル I/O
//
/////////////////////////////////////////////////////////////////////

static bool file_read(const char *path, byte_array &data)
{
    FILE *fp = fopen(path, "rb");
    if (fp == nullptr)
        return false;
    while (!feof(fp))
    {
        int c = fgetc(fp);
        if (c == EOF)
            break;
        data.push_back(uint8_t(c));
    }
    fclose(fp);
    return !ferror(fp);
}

static bool file_write(const char *path, const byte_array &data)
{
    FILE *fp = fopen(path, "wb");
    if (fp == nullptr)
        return false;
    for (byte_array::const_iterator it = data.begin(); it != data.end(); ++it)
        if (fputc(*it, fp) == EOF)
            break;
    fclose(fp);
    return !ferror(fp);
}

/////////////////////////////////////////////////////////////////////
//
// 解凍処理
//
/////////////////////////////////////////////////////////////////////

// ファイル I/O は解凍処理の前後で行う
static int inflate_memory(const char *input, const char *output)
{
    istream_byte_array gzdata;
    ostream_byte_array original;

    if (!file_read(input, gzdata.data))
    {
        ERROR_MESSAGE("read error: %s\n", input);
        return 1;
    }
    gzdata.reset_pointer();

    try
    {
        gzip().inflate(gzdata, original);
    }
    catch (std::exception &e)
    {
        ERROR_MESSAGE("inflate error: %s\n", e.what());
        return 1;
    }

    if (output != nullptr &&
        !file_write(output, original))
    {
        ERROR_MESSAGE("write error: %s\n", output);
        return 1;
    }
    return 0;
}

// ファイル I/O は解凍処理中に直接行う
static int inflate_direct(const char *input, const char *output)
{
    try
    {
        istream_file_pointer inp(input);
        ostream_file_pointer out(output);

        gzip().inflate(inp, out);
    }
    catch (std::exception &e)
    {
        ERROR_MESSAGE("inflate error: %s\n", e.what());
        return 1;
    }
    return 0;
}

/////////////////////////////////////////////////////////////////////
//
// main / usage
//
/////////////////////////////////////////////////////////////////////

static const char *program = NULL;

static int usage()
{
    printf("Usage: %s [options] input [output]\n"
           "\n"
           "options are:\n"
           "    -D    direct I/O mode.\n"
           "    -M    on memory mode.\n"
           "    -d    set debug mode.\n"
           "    -v    set verbose mode.\n"
           "\n"
           , program);
    return 1;
}

static char *shift_arg(int &argc, char **&argv)
{
    char *arg = NULL;

    if (argc > 0)
    {
        arg = *argv;
        --argc;
        ++argv;
    }
    return arg;
}

#define shift()  (shift_arg(argc, argv))

int main(int argc, char **argv)
{
    int new_argc = 0;
    char **new_argv = argv;

    enum main_mode
    {
        mode_memory,
        mode_direct,
    }
    mode = mode_memory;

    program = shift();
    while (argc > 0)
    {
        char *arg = shift();

        if ((arg[0] != '-') || (arg[1] == 0))
        {
            new_argv[new_argc++] = arg;
            continue;
        }

        int s_opt;

        while ((s_opt = *(++arg)) != 0)
        {
            switch (s_opt)
            {
            case 'D':
                mode = mode_direct;
                continue;
            case 'M':
                mode = mode_memory;
                continue;

            case 'd':
                flag_debug = true;
                continue;
            case 'v':
                flag_verbose = true;
                continue;
            default:
                return usage();
            }
        }
    }

    argc = new_argc;
    argv = new_argv;

    const char *input = nullptr;
    const char *output = nullptr;

    if (argc < 1)
        return usage();
    input = argv[0];
    if (argc > 1)
        output = argv[1];
    else
        flag_verbose = true;

    switch (mode)
    {
    case mode_memory:
        return inflate_memory(input, output);
    case mode_direct:
        return inflate_direct(input, output);

    default:
        ERROR_MESSAGE("BUG!!");
        return 1;
    }
}

/////////////////////////////////////////////////////////////////////
// Local Variables:
// c-file-style: "ikiuo"
// End:


0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0