improve JSON parser

2024-12-23 08:54:12 +08:00 · 2020-12-15 07:28:12 -07:00 · 2020-12-15 07:28:12 -07:00 · 2c9ac1da3c
commit 2c9ac1da3c
parent 95b145bece
3 changed files with 35 additions and 42 deletions
--- a/extensions/extrawindow.cpp
+++ b/extensions/extrawindow.cpp
@ -197,7 +197,7 @@ public:
 	void AddSentence(QString sentence)
 	{
 		if (sentence.size() > maxSentenceSize) sentence = SENTENCE_TOO_BIG;
-		if (!showOriginal && sentence.count(u8"\x200b \n")) sentence = sentence.split(u8"\x200b \n")[1];
+		if (!showOriginal && sentence.contains(u8"\x200b \n")) sentence = sentence.split(u8"\x200b \n")[1];
 		sanitize(sentence);
 		sentence.chop(std::distance(std::remove(sentence.begin(), sentence.end(), QChar::Tabulation), sentence.end()));
 		sentenceHistory.push_back(sentence);
--- a/extensions/network.cpp
+++ b/extensions/network.cpp
@ -61,4 +61,4 @@ std::string Escape(const std::string& text)
 	return escaped;
 }

-//TEST(assert(JSON::Parse(LR"([{"string":"hello world","boolean":false,"number": 1.67e+4,"null": null,"array":[]},"hello world"])")))
+TEST(assert(JSON::Parse<wchar_t>(LR"([{"string":"hello world","boolean":false,"number":1.67e+4,"null":null,"array":[]},"hello world"])")))
--- a/extensions/network.h
+++ b/extensions/network.h
@ -62,38 +62,6 @@ namespace JSON
 		inline static std::wstring FromCodepoint(int codepoint) { return { (wchar_t)codepoint }; } // TODO: surrogate pairs
 	};

-	template <typename C>
-	std::pair<std::basic_string<C>, int> Unescape(std::basic_string_view<C> text)
-	{
-		std::basic_string<C> unescaped;
-		int i = 0;
-		for (; i < text.size(); ++i)
-		{
-			auto ch = text[i];
-			if (ch == '"') return { unescaped, i + 1 };
-			if (ch == '\\')
-			{
-				ch = text[i + 1];
-				if (ch == 'u' && isxdigit(text[i + 2]) && isxdigit(text[i + 3]) && isxdigit(text[i + 4]) && isxdigit(text[i + 5]))
-				{
-					char charCode[] = { text[i + 2], text[i + 3], text[i + 4], text[i + 5], 0 };
-					unescaped += UTF<C>::FromCodepoint(strtol(charCode, nullptr, 16));
-					i += 5;
-					continue;
-				}
-				for (auto [original, value] : Array<char, char>{ { 'b', '\b' }, {'f', '\f'}, {'n', '\n'}, {'r', '\r'}, {'t', '\t'} }) if (ch == original)
-				{
-					unescaped.push_back(value);
-					goto replaced;
-				}
-				unescaped.push_back(ch);
-				replaced: i += 1;
-			}
-			else unescaped.push_back(ch);
-		}
-		return { unescaped, i };
-	}
-
 	template <typename C>
 	struct Value : private std::variant<std::monostate, std::nullopt_t, bool, double, std::basic_string<C>, std::vector<Value<C>>, std::unordered_map<std::basic_string<C>, Value<C>>>
 	{
@ -102,6 +70,7 @@ namespace JSON
 		explicit operator bool() const { return index(); }
 		bool IsNull() const { return index() == 1; }
 		auto Boolean() const { return std::get_if<bool>(this); }
+		auto Number() const { return std::get_if<double>(this); }
 		auto String() const { return std::get_if<std::basic_string<C>>(this); }
 		auto Array() const { return std::get_if<std::vector<Value<C>>>(this); }
 		auto Object() const { return std::get_if<std::unordered_map<std::basic_string<C>, Value<C>>>(this); }
@ -121,7 +90,7 @@ namespace JSON
 	};
 	
 	template <typename C, int maxDepth = 25>
-	Value<C> Parse(std::basic_string_view<C> text, int64_t& i, int depth)
+	Value<C> Parse(const std::basic_string<C>& text, int64_t& i, int depth)
 	{
 		if (depth > maxDepth) return {};
 		C ch;
@ -134,10 +103,33 @@ namespace JSON
 		};
 		auto ExtractString = [&]
 		{
+			std::basic_string<C> unescaped;
 			i += 1;
-			auto [string, length] = Unescape(text.substr(i));
-			i += length;
-			return string;
+			for (; i < text.size(); ++i)
+			{
+				auto ch = text[i];
+				if (ch == '"') return i += 1, unescaped;
+				if (ch == '\\')
+				{
+					ch = text[i + 1];
+					if (ch == 'u' && isxdigit(text[i + 2]) && isxdigit(text[i + 3]) && isxdigit(text[i + 4]) && isxdigit(text[i + 5]))
+					{
+						char charCode[] = { text[i + 2], text[i + 3], text[i + 4], text[i + 5], 0 };
+						unescaped += UTF<C>::FromCodepoint(strtol(charCode, nullptr, 16));
+						i += 5;
+						continue;
+					}
+					for (auto [original, value] : Array<char, char>{ { 'b', '\b' }, {'f', '\f'}, {'n', '\n'}, {'r', '\r'}, {'t', '\t'} }) if (ch == original)
+					{
+						unescaped.push_back(value);
+						goto replaced;
+					}
+					unescaped.push_back(ch);
+					replaced: i += 1;
+				}
+				else unescaped.push_back(ch);
+			}
+			return unescaped;
 		};

 		if (SkipWhitespace()) return {};
@ -155,9 +147,10 @@ namespace JSON

 		if (ch == '-' || (ch >= '0' && ch <= '9'))
 		{
-			// no numbers currently used, add an actual parser when needed
-			while (i < text.size() && ((text[i] >= '0' && text[i] <= '9') || text[i] == '-' || text[i] == '+' || text[i] == 'e' || text[i] == 'E' || text[i] == '.')) ++i;
-			return 0.0;
+			std::string number;
+			for (; i < text.size() && ((text[i] >= '0' && text[i] <= '9') || text[i] == '-' || text[i] == '+' || text[i] == 'e' || text[i] == 'E' || text[i] == '.'); ++i)
+				number.push_back(text[i]);
+			return strtod(number.c_str(), NULL);
 		}

 		if (ch == '"') return ExtractString();
@ -203,6 +196,6 @@ namespace JSON
 	Value<C> Parse(const std::basic_string<C>& text)
 	{
 		int64_t start = 0;
-		return Parse((std::basic_string_view<C>)text, start, 0);
+		return Parse(text, start, 0);
 	}
 }