7
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

C++ JSONパーサー自作シリーズ

Part1 再帰下降 Part2 エラー処理 Part3 SIMD高速化
👈 Now - -

はじめに

今回は再帰下降パーサーでJSONパーサーを作ってみよう。

JSONの文法

まず、JSONの構造を確認。

value   := object | array | string | number | "true" | "false" | "null"

object  := "{" [ member ("," member)* ] "}"
member  := string ":" value

array   := "[" [ value ("," value)* ] "]"

string  := '"' characters '"'
number  := ["-"] digit+ ["." digit+] [("e"|"E") ["+"|"-"] digit+]

意外とシンプル。これをそのままコードにする。

JSONの値を表す型

C++17のstd::variantを使う。

#include <variant>
#include <string>
#include <vector>
#include <map>
#include <memory>

// 前方宣言
struct JsonValue;

using JsonNull = std::nullptr_t;
using JsonBool = bool;
using JsonNumber = double;
using JsonString = std::string;
using JsonArray = std::vector<JsonValue>;
using JsonObject = std::map<std::string, JsonValue>;

struct JsonValue {
    std::variant<
        JsonNull,
        JsonBool,
        JsonNumber,
        JsonString,
        JsonArray,
        JsonObject
    > data;
    
    // コンストラクタ
    JsonValue() : data(nullptr) {}
    JsonValue(std::nullptr_t) : data(nullptr) {}
    JsonValue(bool b) : data(b) {}
    JsonValue(double n) : data(n) {}
    JsonValue(int n) : data(static_cast<double>(n)) {}
    JsonValue(const std::string& s) : data(s) {}
    JsonValue(const char* s) : data(std::string(s)) {}
    JsonValue(const JsonArray& a) : data(a) {}
    JsonValue(const JsonObject& o) : data(o) {}
    
    // 型チェック
    bool is_null() const { return std::holds_alternative<JsonNull>(data); }
    bool is_bool() const { return std::holds_alternative<JsonBool>(data); }
    bool is_number() const { return std::holds_alternative<JsonNumber>(data); }
    bool is_string() const { return std::holds_alternative<JsonString>(data); }
    bool is_array() const { return std::holds_alternative<JsonArray>(data); }
    bool is_object() const { return std::holds_alternative<JsonObject>(data); }
    
    // 値の取得
    bool as_bool() const { return std::get<JsonBool>(data); }
    double as_number() const { return std::get<JsonNumber>(data); }
    const std::string& as_string() const { return std::get<JsonString>(data); }
    const JsonArray& as_array() const { return std::get<JsonArray>(data); }
    const JsonObject& as_object() const { return std::get<JsonObject>(data); }
    
    // 配列アクセス
    JsonValue& operator[](size_t index) {
        return std::get<JsonArray>(data)[index];
    }
    
    // オブジェクトアクセス
    JsonValue& operator[](const std::string& key) {
        return std::get<JsonObject>(data)[key];
    }
};

パーサーの構造

class JsonParser {
public:
    JsonValue parse(const std::string& json) {
        input_ = json;
        pos_ = 0;
        
        skip_whitespace();
        auto result = parse_value();
        skip_whitespace();
        
        if (pos_ != input_.size()) {
            throw std::runtime_error("Unexpected trailing characters");
        }
        
        return result;
    }

private:
    std::string input_;
    size_t pos_ = 0;
    
    // ヘルパー関数
    char peek() const {
        return pos_ < input_.size() ? input_[pos_] : '\0';
    }
    
    char advance() {
        return input_[pos_++];
    }
    
    bool match(char c) {
        if (peek() == c) {
            advance();
            return true;
        }
        return false;
    }
    
    void expect(char c) {
        if (!match(c)) {
            throw std::runtime_error(
                std::string("Expected '") + c + "' but got '" + peek() + "'"
            );
        }
    }
    
    void skip_whitespace() {
        while (pos_ < input_.size()) {
            char c = peek();
            if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
                advance();
            } else {
                break;
            }
        }
    }
    
    // パース関数(後述)
    JsonValue parse_value();
    JsonValue parse_null();
    JsonValue parse_bool();
    JsonValue parse_number();
    JsonValue parse_string();
    JsonValue parse_array();
    JsonValue parse_object();
};

値のパース

JsonValue JsonParser::parse_value() {
    skip_whitespace();
    
    char c = peek();
    
    switch (c) {
        case 'n': return parse_null();
        case 't':
        case 'f': return parse_bool();
        case '"': return parse_string();
        case '[': return parse_array();
        case '{': return parse_object();
        case '-':
        case '0': case '1': case '2': case '3': case '4':
        case '5': case '6': case '7': case '8': case '9':
            return parse_number();
        default:
            throw std::runtime_error(
                std::string("Unexpected character: ") + c
            );
    }
}

null/true/falseのパース

JsonValue JsonParser::parse_null() {
    if (input_.substr(pos_, 4) == "null") {
        pos_ += 4;
        return JsonValue(nullptr);
    }
    throw std::runtime_error("Invalid null");
}

JsonValue JsonParser::parse_bool() {
    if (input_.substr(pos_, 4) == "true") {
        pos_ += 4;
        return JsonValue(true);
    }
    if (input_.substr(pos_, 5) == "false") {
        pos_ += 5;
        return JsonValue(false);
    }
    throw std::runtime_error("Invalid boolean");
}

数値のパース

JsonValue JsonParser::parse_number() {
    size_t start = pos_;
    
    // 符号
    if (peek() == '-') {
        advance();
    }
    
    // 整数部
    if (peek() == '0') {
        advance();
    } else if (peek() >= '1' && peek() <= '9') {
        while (peek() >= '0' && peek() <= '9') {
            advance();
        }
    } else {
        throw std::runtime_error("Invalid number");
    }
    
    // 小数部
    if (peek() == '.') {
        advance();
        if (!(peek() >= '0' && peek() <= '9')) {
            throw std::runtime_error("Invalid number: expected digit after decimal point");
        }
        while (peek() >= '0' && peek() <= '9') {
            advance();
        }
    }
    
    // 指数部
    if (peek() == 'e' || peek() == 'E') {
        advance();
        if (peek() == '+' || peek() == '-') {
            advance();
        }
        if (!(peek() >= '0' && peek() <= '9')) {
            throw std::runtime_error("Invalid number: expected digit in exponent");
        }
        while (peek() >= '0' && peek() <= '9') {
            advance();
        }
    }
    
    std::string num_str = input_.substr(start, pos_ - start);
    return JsonValue(std::stod(num_str));
}

文字列のパース

JsonValue JsonParser::parse_string() {
    expect('"');
    
    std::string result;
    
    while (peek() != '"') {
        if (peek() == '\0') {
            throw std::runtime_error("Unterminated string");
        }
        
        if (peek() == '\\') {
            advance();  // '\'を消費
            
            char escaped = advance();
            switch (escaped) {
                case '"':  result += '"'; break;
                case '\\': result += '\\'; break;
                case '/':  result += '/'; break;
                case 'b':  result += '\b'; break;
                case 'f':  result += '\f'; break;
                case 'n':  result += '\n'; break;
                case 'r':  result += '\r'; break;
                case 't':  result += '\t'; break;
                case 'u': {
                    // Unicode エスケープ \uXXXX
                    std::string hex = input_.substr(pos_, 4);
                    pos_ += 4;
                    int codepoint = std::stoi(hex, nullptr, 16);
                    
                    // UTF-8にエンコード
                    if (codepoint < 0x80) {
                        result += static_cast<char>(codepoint);
                    } else if (codepoint < 0x800) {
                        result += static_cast<char>(0xC0 | (codepoint >> 6));
                        result += static_cast<char>(0x80 | (codepoint & 0x3F));
                    } else {
                        result += static_cast<char>(0xE0 | (codepoint >> 12));
                        result += static_cast<char>(0x80 | ((codepoint >> 6) & 0x3F));
                        result += static_cast<char>(0x80 | (codepoint & 0x3F));
                    }
                    break;
                }
                default:
                    throw std::runtime_error(
                        std::string("Invalid escape sequence: \\") + escaped
                    );
            }
        } else {
            result += advance();
        }
    }
    
    expect('"');
    return JsonValue(result);
}

配列のパース

JsonValue JsonParser::parse_array() {
    expect('[');
    skip_whitespace();
    
    JsonArray arr;
    
    if (peek() != ']') {
        arr.push_back(parse_value());
        skip_whitespace();
        
        while (match(',')) {
            skip_whitespace();
            arr.push_back(parse_value());
            skip_whitespace();
        }
    }
    
    expect(']');
    return JsonValue(arr);
}

オブジェクトのパース

JsonValue JsonParser::parse_object() {
    expect('{');
    skip_whitespace();
    
    JsonObject obj;
    
    if (peek() != '}') {
        // 最初のメンバー
        auto key = parse_string().as_string();
        skip_whitespace();
        expect(':');
        skip_whitespace();
        obj[key] = parse_value();
        skip_whitespace();
        
        // 残りのメンバー
        while (match(',')) {
            skip_whitespace();
            auto key = parse_string().as_string();
            skip_whitespace();
            expect(':');
            skip_whitespace();
            obj[key] = parse_value();
            skip_whitespace();
        }
    }
    
    expect('}');
    return JsonValue(obj);
}

使い方

#include <iostream>

int main() {
    JsonParser parser;
    
    std::string json = R"({
        "name": "Aqua",
        "age": 15,
        "skills": ["C++", "Rust", "OS開発"],
        "active": true,
        "nullable": null
    })";
    
    try {
        JsonValue value = parser.parse(json);
        
        std::cout << "Name: " << value["name"].as_string() << "\n";
        std::cout << "Age: " << value["age"].as_number() << "\n";
        std::cout << "Skills:\n";
        for (const auto& skill : value["skills"].as_array()) {
            std::cout << "  - " << skill.as_string() << "\n";
        }
        std::cout << "Active: " << (value["active"].as_bool() ? "yes" : "no") << "\n";
        std::cout << "Nullable is null: " << (value["nullable"].is_null() ? "yes" : "no") << "\n";
        
    } catch (const std::exception& e) {
        std::cerr << "Parse error: " << e.what() << "\n";
        return 1;
    }
    
    return 0;
}
Name: Aqua
Age: 15
Skills:
  - C++
  - Rust
  - OS開発
Active: yes
Nullable is null: yes

JSONの出力

パースだけじゃなくて、出力もできるようにしよう。

class JsonSerializer {
public:
    std::string serialize(const JsonValue& value, bool pretty = false, int indent = 0) {
        pretty_ = pretty;
        indent_ = indent;
        return serialize_value(value);
    }

private:
    bool pretty_;
    int indent_;
    
    std::string serialize_value(const JsonValue& value) {
        if (value.is_null()) return "null";
        if (value.is_bool()) return value.as_bool() ? "true" : "false";
        if (value.is_number()) return serialize_number(value.as_number());
        if (value.is_string()) return serialize_string(value.as_string());
        if (value.is_array()) return serialize_array(value.as_array());
        if (value.is_object()) return serialize_object(value.as_object());
        
        throw std::runtime_error("Unknown JSON type");
    }
    
    std::string serialize_number(double n) {
        // 整数の場合は小数点なしで出力
        if (n == static_cast<int64_t>(n)) {
            return std::to_string(static_cast<int64_t>(n));
        }
        
        char buf[32];
        snprintf(buf, sizeof(buf), "%.17g", n);
        return buf;
    }
    
    std::string serialize_string(const std::string& s) {
        std::string result = "\"";
        
        for (char c : s) {
            switch (c) {
                case '"':  result += "\\\""; break;
                case '\\': result += "\\\\"; break;
                case '\b': result += "\\b"; break;
                case '\f': result += "\\f"; break;
                case '\n': result += "\\n"; break;
                case '\r': result += "\\r"; break;
                case '\t': result += "\\t"; break;
                default:
                    if (static_cast<unsigned char>(c) < 0x20) {
                        char buf[8];
                        snprintf(buf, sizeof(buf), "\\u%04x", c);
                        result += buf;
                    } else {
                        result += c;
                    }
            }
        }
        
        result += '"';
        return result;
    }
    
    std::string serialize_array(const JsonArray& arr) {
        if (arr.empty()) return "[]";
        
        std::string result = "[";
        
        if (pretty_) {
            result += "\n";
            indent_ += 2;
        }
        
        for (size_t i = 0; i < arr.size(); ++i) {
            if (pretty_) {
                result += std::string(indent_, ' ');
            }
            
            result += serialize_value(arr[i]);
            
            if (i < arr.size() - 1) {
                result += ",";
            }
            
            if (pretty_) {
                result += "\n";
            }
        }
        
        if (pretty_) {
            indent_ -= 2;
            result += std::string(indent_, ' ');
        }
        
        result += "]";
        return result;
    }
    
    std::string serialize_object(const JsonObject& obj) {
        if (obj.empty()) return "{}";
        
        std::string result = "{";
        
        if (pretty_) {
            result += "\n";
            indent_ += 2;
        }
        
        size_t i = 0;
        for (const auto& [key, value] : obj) {
            if (pretty_) {
                result += std::string(indent_, ' ');
            }
            
            result += serialize_string(key);
            result += pretty_ ? ": " : ":";
            result += serialize_value(value);
            
            if (i < obj.size() - 1) {
                result += ",";
            }
            
            if (pretty_) {
                result += "\n";
            }
            
            ++i;
        }
        
        if (pretty_) {
            indent_ -= 2;
            result += std::string(indent_, ' ');
        }
        
        result += "}";
        return result;
    }
};

まとめ

トピック ポイント
再帰下降パーサー 文法をそのままコードに
std::variant 複数の型を1つの変数で
エスケープシーケンス \n, \uXXXX などの処理
UTF-8 Unicodeの可変長エンコーディング

次回はエラー処理とエラーメッセージを改善して、使いやすいパーサーにするよ。

この記事が役に立ったら、いいね・ストックしてもらえると嬉しいです!

7
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
7
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?