Contents (click to collapse) >>

json parser

This is a practice of parsing a json string with c++.

the sandard is from https://www.json.org

State machine

We usually use state machine to solve the string-paring problems.

json parser

Implementation in c++

If we can prettify a json string, we have aready parsed it.

```
enum JsonState {
    BEGIN,
    OBJECT,
    KEY,
    KEY_VALUE_SEPARATOR,
    VALUE,
    END_VALUE,
    END_OBJECT,

    ARRAY,
    END_ARRAY,

    COMMA,

    NUMBER,
    STRING,
    BOOLEAN,
    NULL_VALUE,

    END,
};

std::map<JsonState, std::string> state_map = {
    {JsonState::BEGIN, "BEGIN"},
    {JsonState::OBJECT, "OBJECT"},
    {JsonState::KEY, "KEY"},
    {JsonState::KEY_VALUE_SEPARATOR, "KEY_VALUE_SEPARATOR"},
    {JsonState::VALUE, "VALUE"},
    {JsonState::END_VALUE, "END_VALUE"},
    {JsonState::END_OBJECT, "END_OBJECT"},

    {JsonState::ARRAY, "ARRAY"},

    {JsonState::COMMA, "COMMA"},

    {JsonState::NUMBER, "NUMBER"},
    {JsonState::STRING, "STRING"},
    {JsonState::BOOLEAN, "BOOLEAN"},
    {JsonState::NULL_VALUE, "NULL_VALUE"},

    {JsonState::END, "END"},
};

int prettify_json(const std::string& json, int indent_size, std::string* out) {
    out->clear();
    std::stringstream ss;
    char last_char = '\0';
    JsonState state = JsonState::BEGIN;
    int indent = 0;
    auto indents = [&indent_size](int indent)-> std::string {
            std::string str;
            indent = indent * indent_size;
            while (indent--) {
                str += " ";
            }
            return str;
    };
    std::string white_chars = " \t\n\r";
    auto is_white_char = [&white_chars](char c)-> bool {
            return white_chars.find(c) != std::string::npos;
    };

    std::string number;
    std::string boolean;
    std::string string;
    std::string null;

    std::string key;

    std::stack<JsonState> container_ctx;

    char c = '\0';
    for (size_t i = 0; i < json.size();) {
        if (i > 0) {
            last_char = json[i - 1];
        }
        c = json[i];
        switch (state) {
            case JsonState::BEGIN: {
//                 std::cout << "state: " << state_map[state] << ", char: " << c /*<< " -> "*/ << std::endl;
                if (c == '{') {
                    state = JsonState::OBJECT;
                    ss << c << std::endl << indents(++indent);
                } else if (c == '[') {
                    state = JsonState::ARRAY;
                    ss << c << std::endl << indents(++indent);
                } else if (!is_white_char(c)) {
                    std::cout << "malformat json" << std::endl;
                    return i;
                }
                ++i;
                break;
            }
            case JsonState::OBJECT: {
//                 std::cout << "state: " << state_map[state] << ", char: " << c /*<< " -> "*/ << std::endl;
                container_ctx.push(JsonState::OBJECT);
                if (c == '"') {
                    state = JsonState::KEY;
                    continue; // keep " to next state
                } else if (c == '}') {
                    state = JsonState::END_OBJECT;
                    if (container_ctx.top() != JsonState::OBJECT) {
                        std::cout << "=xxxxxxxxxxxxxx" << std::endl;
                    }
                    ss << std::endl << indents(indent--) << c;
                } else if (!is_white_char(c)) {
                    std::cerr << "malformat json" << std::endl;
                    return i;
                }
                ++i;
                break;
            }
            case JsonState::KEY: {
//                 std::cout << "state: " << state_map[state] << ", char: " << c /*<< " -> "*/ << std::endl;
                if (key.size() == 0) {
                    if (c == '"') {
                        key = '"';
                        ss << c;
                    } else if (!is_white_char(c)) {
                        std::cerr << "malformat json at: " << i << std::endl;
                        return i;
                    }
                } else {
                    ss << c;
                    key += c;
                    if (c == '"' && last_char != '\\') {
                        key.clear();
                        state = JsonState::KEY_VALUE_SEPARATOR;
                    }
                }
                ++i;
                break;
            }
            case JsonState::KEY_VALUE_SEPARATOR: {
//                 std::cout << "state: " << state_map[state] << ", char: " << c /*<< " -> "*/ << std::endl;
                if (c == ':') {
                    ss << c << ' '; // separator
                    state = JsonState::VALUE;
                } else if (!is_white_char(c)) {
                    std::cerr << "malformat json at: " << i << std::endl;
                    return i;
                }
                ++i;
                break;
            }
            case JsonState::VALUE: { // value may be object, array, number, string or boolean
//                 std::cout << "state: " << state_map[state] << ", char: " << c /*<< " -> "*/ << std::endl;
                if (c == '{') {
                    state = JsonState::OBJECT;
                    ss << c << std::endl << indents(++indent);
                } else if (c == '[') {
                    state = JsonState::ARRAY;
                    ss << c << std::endl << indents(++indent);
                } else if (c == '"') {
                    state = JsonState::STRING;
                    string = c;
                    ss << c;
                } else if (c == '-' || (c >= '0' && c <= '9')) {
                    state = JsonState::NUMBER;
                    continue; // keep c to next state
                } else if (c == 't' || c == 'f') {
                    state = JsonState::BOOLEAN;
                    boolean = c;
                    ss << c;
                } else if (c == 'n') {
                    state = JsonState::NULL_VALUE;
                    null = c;
                    ss << c;
                } else if (!is_white_char(c)) {
                    std::cerr << "malformat json" << std::endl;
                    return i;
                }
                ++i;
                break;
            }
            case JsonState::COMMA: {
//                 std::cout << "state: " << state_map[state] << ", char: " << c /*<< " -> "*/ << std::endl;
                if (container_ctx.top() == JsonState::OBJECT) { // separator between pair
                    state = JsonState::KEY;
                } if (container_ctx.top() == JsonState::ARRAY) {
                    state = JsonState::VALUE;
                }
                break;
            }
            case JsonState::STRING: {
//                 std::cout << "state: " << state_map[state] << ", char: " << c /*<< " -> "*/ << std::endl;
                if (c == '"' && last_char != '\\') {
                    ss << c;
                    string.clear();
                    state = JsonState::END_VALUE;
                } else {
                    ss << c;
                    string += c;
                }
                ++i;
                break;
            }
            case JsonState::NUMBER: {
//                 std::cout << "state: " << state_map[state] << ", char: " << c /*<< " -> "*/ << std::endl;
                if ((c <= '9' && c >= '0') || c == 'e' || c == '.' || c == '-') {
                    number += c;
                    ss << c;
                    if (number.rfind('.') != number.find('.')
                            || number.rfind('-') != number.find('-')
                            || number.rfind('e') != number.find('e')) {
                        std::cerr << "malformat json, at: " << i << std::endl;
                        return i;
                    }
                } else if (is_white_char(c) || c == ']' || c == '}' || c == ',') {
                    number.clear();
                    state = JsonState::END_VALUE;
                    continue;
                } else {
                    std::cerr << "malformat json, at: " << i << std::endl;
                    return i;
                }
                ++i;
                break;
            }
            case JsonState::BOOLEAN: {
                if (c >= 'a' && c <= 'z') {
                    boolean += c;
                    ss << c;
                } else if (is_white_char(c) || c == '}' || c == ']' || c == ',') {
                    if (boolean != "true" && boolean != "false") {
                        std::cerr << "malformat json, at: " << i << std::endl;
                        return i;
                    }
                    boolean.clear();
                    state = JsonState::END_VALUE;
                    continue;
                } else {
                    std::cerr << "malformat json, at: " << i << std::endl;
                    return i;
                }
                ++i;
                break;
            }
            case JsonState::NULL_VALUE: {
                if (c >= 'a' && c <= 'z') {
                    null += c;
                    ss << c;
                } else if (is_white_char(c) || c == '}' || c == ']' || c == ',') {
                    if (null != "null") {
                        std::cerr << "malformat json, at: " << i << std::endl;
                        return i;
                    }
                    boolean.clear();
                    state = JsonState::END_VALUE;
                    continue;
                } else {
                    std::cerr << "malformat json, at: " << i << std::endl;
                    return i;
                }
                ++i;
                break;
            }
            case JsonState::END_VALUE: {
//                 std::cout << "state: " << state_map[state] << ", char: " << c /*<< " -> "*/ << std::endl;
                if (c == ',') {
                    ss << c << std::endl << indents(indent);
                    state = JsonState::COMMA;
                } else if (c == '}') { // state not changed
                    state = JsonState::END_OBJECT;
                    if (container_ctx.top() != JsonState::OBJECT) {
                        std::cout << "=xxxxxxxxxxxxxx" << std::endl;
                    }
                    ss << std::endl << indents(--indent) << c;
                } else if (c == ']') { // state not changed
                    state = JsonState::END_ARRAY;
                    if (container_ctx.top() != JsonState::ARRAY) {
                        std::cout << "=xxxxxxxxxxxxxx" << std::endl;
                    }
                    ss << std::endl << indents(--indent) << c;
                } else if (!is_white_char(c)) {
                    std::cerr << "malformat json" << std::endl;
                    return i;
                }
                ++i;
                break;
            }
            case JsonState::ARRAY: {
//                 std::cout << "state: " << state_map[state] << ", char: " << c /*<< " -> "*/ << std::endl;
                container_ctx.push(JsonState::ARRAY);
                if (c == '{') {
                    state = JsonState::OBJECT;
                    ss << c << std::endl << indents(++indent);
                } else if (c == '[') {
                    state = JsonState::ARRAY;
                    ss << c << std::endl << indents(++indent);
                } else if (c == '"') {
                    state = JsonState::STRING;
                    ss << c;
                } else if (c == '-' || (c >= '0' && c <= '9')) {
                    state = JsonState::NUMBER;
                    ss << c;
                } else if (c == 't' || c == 'f') {
                    state = JsonState::BOOLEAN;
                    ss << c;
                } else if (c == 'n') {
                    state = JsonState::NULL_VALUE;
                    ss << c;
                } else if (c == ']') {
                    state = JsonState::END_ARRAY;
                    if (container_ctx.top() != JsonState::ARRAY) {
                        std::cout << "=xxxxxxxxxxxxxx" << std::endl;
                    }
                    ss << indents(--indent) << std::endl << c;
                } else if (!is_white_char(c)) {
                    std::cerr << "malformat json" << std::endl;
                    return i;
                }
                ++i;
                break;
            }
            case JsonState::END_ARRAY:
            case JsonState::END_OBJECT: {
//                 std::cout << "state: " << state_map[state] << ", char: " << c /*<< " -> "*/ << std::endl;
                container_ctx.pop();
                state = JsonState::END_VALUE;
                if (container_ctx.size() == 0) {
                    state = JsonState::END;
                }
                break;
            }
            case JsonState::END: {
                if (!is_white_char(c)) {
                    std::cerr << "malformat json, at: " << i << std::endl;
                    return i;
                }
                ++i;
                break;
            }
            default: {
                ++i;
                break;
            }
        }
    }
    ss << std::endl;
    out->assign(ss.str());
    return 0;
}
```
comments powered by Disqus

从 shared_from_this() 谈智能指针 weak_ptr 和 shared_ptr 的实现

一般来说c++ `shared_ptr` 实现逻辑上基本上都是一个ptr加上一个control block来实现,control block 用于保存引用计数以及如何回收(deleter)等信息,有一些实现(gcc)会将ptr放到control block里,有的(llvm...… Continue reading

braft call graph

Published on September 15, 2019

Clock And Timestamp

Published on August 16, 2019