implement replacer wildcard and optimize memory usage when loading files

2020-02-16 17:58:09 -07:00 · 2020-02-16 17:58:09 -07:00 · b61272a5e6
commit b61272a5e6
parent b0eeca5b36
7 changed files with 96 additions and 53 deletions
--- a/GUI/host/host.cpp
+++ b/GUI/host/host.cpp
@ -2,7 +2,6 @@
 #include "defs.h"
 #include "util.h"
 #include "../texthook/texthook.h"
 #include <filesystem>
 extern const wchar_t* ALREADY_INJECTED;
 extern const wchar_t* NEED_32_BIT;
--- a/extensions/blockmarkup.h
+++ b/extensions/blockmarkup.h
@ -0,0 +1,55 @@
 #pragma once
 #include "common.h"
 #include <istream>
 template <typename C, int DelimiterCount, int bufferStartSize = 200>
 class BlockMarkupIterator
 {
 public:
 	BlockMarkupIterator(std::istreambuf_iterator<char> it, const std::basic_string_view<C> (&delimiters)[DelimiterCount]) :
 		it(it)
 	{
 		std::copy_n(delimiters, DelimiterCount, this->delimiters.begin());
 	}
 	std::optional<std::array<std::basic_string<C>, DelimiterCount>> Next()
 	{
 		std::array<std::basic_string<C>, DelimiterCount> results;
 		std::basic_string<C> buffer;
 		buffer.reserve(bufferStartSize);
 		Find(buffer, delimiters[0]);
 		buffer.clear();
 		for (int i = 0; i < DelimiterCount; ++i)
 		{
 			const auto delimiter = i + 1 < DelimiterCount ? delimiters[i + 1] : end;
 			if (!Find(buffer, delimiter)) return {};
 			buffer.erase(buffer.size() - delimiter.size());
 			results[i] = std::move(buffer);
 			(buffer = {}).reserve(bufferStartSize);
 		}
 		return results;
 	}
 private:
 	bool Find(std::basic_string<C>& result, std::basic_string_view<C> delimiter)
 	{
 		while (Read((result += C{}).back())) if (result.back() == '|' && result.find(delimiter, result.size() - delimiter.size()) != std::string::npos) return true;
 		return false;
 	}
 	bool Read(C& out)
 	{
 		BYTE buffer[sizeof(C)];
 		for (int i = 0; i < sizeof(C); ++i, ++it)
 			if (it.equal({})) return false;
 			else buffer[i] = *it;
 		out = reinterpret_cast<C&>(buffer);
 		return true;
 	}
 	static constexpr C endImpl[5] = { '|', 'E', 'N', 'D', '|' };
 	static constexpr std::basic_string_view end{ endImpl, 5 };
 	std::istreambuf_iterator<char> it;
 	std::array<std::basic_string_view<C>, DelimiterCount> delimiters;
 };
--- a/extensions/extrawindow.cpp
+++ b/extensions/extrawindow.cpp
@ -2,8 +2,8 @@
 #include "extension.h"
 #include "ui_extrawindow.h"
 #include "defs.h"
 #include "blockmarkup.h"
 #include <fstream>
 #include <filesystem>
 #include <process.h>
 #include <QColorDialog>
 #include <QFontDialog>
@ -281,32 +281,31 @@ private:
 			catch (std::filesystem::filesystem_error) { return; }
 			dictionary.clear();
-			owningStorage.clear();
+			charStorage.clear();
-			auto StoreCopy = [&](const std::string& string)
+			auto StoreCopy = [&](std::string_view string)
 			{
-				return &*owningStorage.insert(owningStorage.end(), string.c_str(), string.c_str() + string.size() + 1);
+				auto location = &*charStorage.insert(charStorage.end(), string.begin(), string.end());
 				charStorage.push_back(0);
 				return location;
 			};
-			std::string savedDictionary(std::istreambuf_iterator(std::ifstream(DICTIONARY_SAVE_FILE)), {});
+			charStorage.reserve(std::filesystem::file_size(DICTIONARY_SAVE_FILE));
-			owningStorage.reserve(savedDictionary.size());
+			std::ifstream stream(DICTIONARY_SAVE_FILE);
-			for (size_t end = 0; ;)
+			BlockMarkupIterator savedDictionary(stream, Array<std::string_view>{ "|TERM|", "|DEFINITION|" });
 			while (auto read = savedDictionary.Next())
 			{
-				size_t term = savedDictionary.find("|TERM|", end);
+				const auto& [terms, definition] = *read;
-				size_t definition = savedDictionary.find("|DEFINITION|", term);
+				auto storedDefinition = StoreCopy(definition);
-				if ((end = savedDictionary.find("|END|", definition)) == std::string::npos) break;
+				std::string_view termsView = terms;
-				auto storedDefinition = StoreCopy(savedDictionary.substr(definition + 12, end - definition - 12));
+				size_t start = 0, end = termsView.find("|TERM|");
-				for (size_t next; (next = savedDictionary.find("|TERM|", term + 1)) != std::string::npos && next < definition; term = next)
+				while (end != std::string::npos)
-					dictionary.push_back({ StoreCopy(savedDictionary.substr(term + 6, next - term - 6)), storedDefinition });
+				{
-				dictionary.push_back({ StoreCopy(savedDictionary.substr(term + 6, definition - term - 6)), storedDefinition });
+					dictionary.push_back(DictionaryEntry{ StoreCopy(termsView.substr(start, end - start)), storedDefinition });
-			}
+					start = end + 6;
-			auto oldData = owningStorage.data();
+					end = termsView.find("|TERM|", start);
-			owningStorage.shrink_to_fit();
+				}
-			dictionary.shrink_to_fit();
+				dictionary.push_back(DictionaryEntry{ StoreCopy(termsView.substr(start)), storedDefinition });
 			for (auto& [term, definition] : dictionary)
 			{
 				term += owningStorage.data() - oldData;
 				definition += owningStorage.data() - oldData;
 			}
 			std::sort(dictionary.begin(), dictionary.end());
 		}
@ -354,7 +353,7 @@ private:
 		}
 		std::filesystem::file_time_type dictionaryFileLastWrite;
-		std::vector<char> owningStorage;
+		std::vector<char> charStorage;
 		std::vector<QString> definitions;
 		int definitionIndex;
 	} dictionaryWindow;
--- a/extensions/replacer.cpp
+++ b/extensions/replacer.cpp
@ -1,7 +1,8 @@
 #include "extension.h"
 #include "blockmarkup.h"
 #include <cwctype>
 #include <fstream>
-#include <filesystem>
+#include <sstream>
 #include <process.h>
 extern const wchar_t* REPLACER_INSTRUCTIONS;
@ -14,14 +15,16 @@ std::shared_mutex m;
 class Trie
 {
 public:
-	Trie(std::unordered_map<std::wstring, std::wstring> replacements)
+	Trie(const std::istream& replacementScript)
 	{
-		for (const auto& [original, replacement] : replacements)
+		BlockMarkupIterator replacementScriptParser(replacementScript.rdbuf(), Array<std::wstring_view>{ L"|ORIG|", L"|BECOMES|" });
 		while (auto read = replacementScriptParser.Next())
 		{
 			const auto& [original, replacement] = *read;
 			Node* current = &root;
 			for (auto ch : original) if (!Ignore(ch)) current = Next(current, ch);
 			if (current != &root)
-				current->value = owningStorage.insert(owningStorage.end(), replacement.c_str(), replacement.c_str() + replacement.size() + 1) - owningStorage.begin();
+				current->value = charStorage.insert(charStorage.end(), replacement.c_str(), replacement.c_str() + replacement.size() + 1) - charStorage.begin();
 		}
 	}
@ -38,10 +41,10 @@ public:
 			{
 				if (current->value >= 0)
 				{
-					replacement = owningStorage.data() + current->value;
+					replacement = charStorage.data() + current->value;
 					originalLength = j - i;
 				}
-				if (!Ignore(sentence[j])) current = Next(current, sentence[j]);
+				if (!Ignore(sentence[j])) current = Next(current, sentence[j]) ? Next(current, sentence[j]) : Next(current, L'^');
 			}
 			result += replacement;
@ -76,30 +79,16 @@ private:
 		ptrdiff_t value = -1;
 	} root;
-	std::vector<wchar_t> owningStorage;
+	std::vector<wchar_t> charStorage;
-} trie = { {} };
+} trie = { std::istringstream("") };
 std::unordered_map<std::wstring, std::wstring> Parse(std::wstring_view replacementScript)
 {
 	std::unordered_map<std::wstring, std::wstring> replacements;
 	for (size_t end = 0; ;)
 	{
 		size_t original = replacementScript.find(L"|ORIG|", end);
 		size_t becomes = replacementScript.find(L"|BECOMES|", original);
 		if ((end = replacementScript.find(L"|END|", becomes)) == std::wstring::npos) break;
 		replacements[std::wstring(replacementScript.substr(original + 6, becomes - original - 6))] = replacementScript.substr(becomes + 9, end - becomes - 9);
 	}
 	return replacements;
 }
 void UpdateReplacements()
 {
 	try
 	{
 		if (replaceFileLastWrite.exchange(std::filesystem::last_write_time(REPLACE_SAVE_FILE)) == std::filesystem::last_write_time(REPLACE_SAVE_FILE)) return;
 		std::vector<BYTE> file(std::istreambuf_iterator(std::ifstream(REPLACE_SAVE_FILE, std::ios::binary)), {});
 		std::scoped_lock l(m);
-		trie = Trie(Parse({ (wchar_t*)file.data(), file.size() / sizeof(wchar_t) }));
+		trie = Trie(std::ifstream(REPLACE_SAVE_FILE, std::ios::binary));
 	}
 	catch (std::filesystem::filesystem_error) { replaceFileLastWrite.store({}); }
 }
@ -138,12 +127,12 @@ bool ProcessSentence(std::wstring& sentence, SentenceInfo)
 TEST(
 	{
-		auto replacements = Parse(LR"(
+		std::wstring replacementScript = LR"(
 |ORIG|さよなら|BECOMES|goodbye |END|Ignore this text
 And this text ツ　　
 |ORIG|バカ|BECOMES|idiot|END|
-|ORIG|こんにちは |BECOMES| hello|END||ORIG|delete this|BECOMES||END|)");
+|ORIG|こんにちは |BECOMES| hello|END||ORIG|delet^this|BECOMES||END|)";
-		assert(replacements.size() == 4);
+		Trie replacements(std::istringstream(std::string{ (const char*)replacementScript.c_str(), replacementScript.size() * sizeof(wchar_t) }));
 		std::wstring original = LR"(Don't replace this　
 さよなら バカ こんにちは delete this)";
 		std::wstring replaced = Trie(std::move(replacements)).Replace(original);
--- a/include/common.h
+++ b/include/common.h
@ -17,6 +17,7 @@
 #include <mutex>
 #include <shared_mutex>
 #include <atomic>
 #include <filesystem>
 #include <cstdint>
 #include <cassert>
@ -33,7 +34,7 @@ struct ArrayImpl<T> { using type = T[]; };
 template <typename... Ts>
 using Array = typename ArrayImpl<Ts...>::type;
-template <auto F> using Functor = std::integral_constant<std::decay_t<decltype(F)>, F>;
+template <auto F> using Functor = std::integral_constant<std::remove_reference_t<decltype(F)>, F>;
 template <typename V>
 struct Identity { V operator()(V v) const { return v; } };
--- a/test/main.cpp
+++ b/test/main.cpp
@ -1,7 +1,6 @@
 #include "common.h"
 #include "defs.h"
 #include "resource.h"
 #include <filesystem>
 #include <fstream>
 #include <sstream>
 #include <QApplication>
--- a/text.cpp
+++ b/text.cpp
@ -110,7 +110,7 @@ const char* OUT_OF_RECORDS_RETRY = u8"Textractor: out of search records, please
 const char* FUNC_MISSING = u8"Textractor: function not present";
 const char* MODULE_MISSING = u8"Textractor: module not present";
 const char* GARBAGE_MEMORY = u8"Textractor: memory constantly changing, useless to read";
-const char* SEND_ERROR = u8"Textractor: Send ERROR (likely an incorrect H-code)";
+const char* SEND_ERROR = u8"Textractor: Send ERROR (likely an unstable/incorrect H-code)";
 const char* READ_ERROR = u8"Textractor: Reader ERROR (likely an incorrect R-code)";
 const char* HIJACK_ERROR = u8"Textractor: Hijack ERROR";
 const char* COULD_NOT_FIND = u8"Textractor: could not find text";
@ -174,6 +174,7 @@ const wchar_t* REPLACER_INSTRUCTIONS = LR"(This file only does anything when the
 Replacement commands must be formatted like this:
 |ORIG|original_text|BECOMES|replacement_text|END|
 All text in this file outside of a replacement command is ignored.
 A caret (^) acts as a wildcard that matches any other single character.
 Whitespace in original_text is ignored, but replacement_text can contain spaces, newlines, etc.
 This file must be encoded in Unicode (UTF-16 Little Endian).)";
 const char* THREAD_LINKER = u8"Thread Linker";