From 2c9ac1da3c6e1ca7b40375b97c4a91565c7edf35 Mon Sep 17 00:00:00 2001 From: Akash Mozumdar Date: Tue, 15 Dec 2020 07:28:12 -0700 Subject: [PATCH] improve JSON parser --- extensions/extrawindow.cpp | 2 +- extensions/network.cpp | 2 +- extensions/network.h | 73 +++++++++++++++++--------------------- 3 files changed, 35 insertions(+), 42 deletions(-) diff --git a/extensions/extrawindow.cpp b/extensions/extrawindow.cpp index dd57edd..a602595 100644 --- a/extensions/extrawindow.cpp +++ b/extensions/extrawindow.cpp @@ -197,7 +197,7 @@ public: void AddSentence(QString sentence) { if (sentence.size() > maxSentenceSize) sentence = SENTENCE_TOO_BIG; - if (!showOriginal && sentence.count(u8"\x200b \n")) sentence = sentence.split(u8"\x200b \n")[1]; + if (!showOriginal && sentence.contains(u8"\x200b \n")) sentence = sentence.split(u8"\x200b \n")[1]; sanitize(sentence); sentence.chop(std::distance(std::remove(sentence.begin(), sentence.end(), QChar::Tabulation), sentence.end())); sentenceHistory.push_back(sentence); diff --git a/extensions/network.cpp b/extensions/network.cpp index 24fff12..454fe2c 100644 --- a/extensions/network.cpp +++ b/extensions/network.cpp @@ -61,4 +61,4 @@ std::string Escape(const std::string& text) return escaped; } -//TEST(assert(JSON::Parse(LR"([{"string":"hello world","boolean":false,"number": 1.67e+4,"null": null,"array":[]},"hello world"])"))) +TEST(assert(JSON::Parse(LR"([{"string":"hello world","boolean":false,"number":1.67e+4,"null":null,"array":[]},"hello world"])"))) diff --git a/extensions/network.h b/extensions/network.h index d2610ca..bc9dfcc 100644 --- a/extensions/network.h +++ b/extensions/network.h @@ -62,38 +62,6 @@ namespace JSON inline static std::wstring FromCodepoint(int codepoint) { return { (wchar_t)codepoint }; } // TODO: surrogate pairs }; - template - std::pair, int> Unescape(std::basic_string_view text) - { - std::basic_string unescaped; - int i = 0; - for (; i < text.size(); ++i) - { - auto ch = text[i]; - if (ch == '"') return { unescaped, i + 1 }; - if (ch == '\\') - { - ch = text[i + 1]; - if (ch == 'u' && isxdigit(text[i + 2]) && isxdigit(text[i + 3]) && isxdigit(text[i + 4]) && isxdigit(text[i + 5])) - { - char charCode[] = { text[i + 2], text[i + 3], text[i + 4], text[i + 5], 0 }; - unescaped += UTF::FromCodepoint(strtol(charCode, nullptr, 16)); - i += 5; - continue; - } - for (auto [original, value] : Array{ { 'b', '\b' }, {'f', '\f'}, {'n', '\n'}, {'r', '\r'}, {'t', '\t'} }) if (ch == original) - { - unescaped.push_back(value); - goto replaced; - } - unescaped.push_back(ch); - replaced: i += 1; - } - else unescaped.push_back(ch); - } - return { unescaped, i }; - } - template struct Value : private std::variant, std::vector>, std::unordered_map, Value>> { @@ -102,6 +70,7 @@ namespace JSON explicit operator bool() const { return index(); } bool IsNull() const { return index() == 1; } auto Boolean() const { return std::get_if(this); } + auto Number() const { return std::get_if(this); } auto String() const { return std::get_if>(this); } auto Array() const { return std::get_if>>(this); } auto Object() const { return std::get_if, Value>>(this); } @@ -121,7 +90,7 @@ namespace JSON }; template - Value Parse(std::basic_string_view text, int64_t& i, int depth) + Value Parse(const std::basic_string& text, int64_t& i, int depth) { if (depth > maxDepth) return {}; C ch; @@ -134,10 +103,33 @@ namespace JSON }; auto ExtractString = [&] { + std::basic_string unescaped; i += 1; - auto [string, length] = Unescape(text.substr(i)); - i += length; - return string; + for (; i < text.size(); ++i) + { + auto ch = text[i]; + if (ch == '"') return i += 1, unescaped; + if (ch == '\\') + { + ch = text[i + 1]; + if (ch == 'u' && isxdigit(text[i + 2]) && isxdigit(text[i + 3]) && isxdigit(text[i + 4]) && isxdigit(text[i + 5])) + { + char charCode[] = { text[i + 2], text[i + 3], text[i + 4], text[i + 5], 0 }; + unescaped += UTF::FromCodepoint(strtol(charCode, nullptr, 16)); + i += 5; + continue; + } + for (auto [original, value] : Array{ { 'b', '\b' }, {'f', '\f'}, {'n', '\n'}, {'r', '\r'}, {'t', '\t'} }) if (ch == original) + { + unescaped.push_back(value); + goto replaced; + } + unescaped.push_back(ch); + replaced: i += 1; + } + else unescaped.push_back(ch); + } + return unescaped; }; if (SkipWhitespace()) return {}; @@ -155,9 +147,10 @@ namespace JSON if (ch == '-' || (ch >= '0' && ch <= '9')) { - // no numbers currently used, add an actual parser when needed - while (i < text.size() && ((text[i] >= '0' && text[i] <= '9') || text[i] == '-' || text[i] == '+' || text[i] == 'e' || text[i] == 'E' || text[i] == '.')) ++i; - return 0.0; + std::string number; + for (; i < text.size() && ((text[i] >= '0' && text[i] <= '9') || text[i] == '-' || text[i] == '+' || text[i] == 'e' || text[i] == 'E' || text[i] == '.'); ++i) + number.push_back(text[i]); + return strtod(number.c_str(), NULL); } if (ch == '"') return ExtractString(); @@ -203,6 +196,6 @@ namespace JSON Value Parse(const std::basic_string& text) { int64_t start = 0; - return Parse((std::basic_string_view)text, start, 0); + return Parse(text, start, 0); } }