From b61272a5e6ed64fb52df88592f9a24f01fa7c2ec Mon Sep 17 00:00:00 2001 From: Akash Mozumdar Date: Sun, 16 Feb 2020 17:58:09 -0700 Subject: [PATCH] implement replacer wildcard and optimize memory usage when loading files --- GUI/host/host.cpp | 1 - extensions/blockmarkup.h | 55 ++++++++++++++++++++++++++++++++++++++ extensions/extrawindow.cpp | 45 +++++++++++++++---------------- extensions/replacer.cpp | 41 +++++++++++----------------- include/common.h | 3 ++- test/main.cpp | 1 - text.cpp | 3 ++- 7 files changed, 96 insertions(+), 53 deletions(-) create mode 100644 extensions/blockmarkup.h diff --git a/GUI/host/host.cpp b/GUI/host/host.cpp index 55ac92b..5609445 100644 --- a/GUI/host/host.cpp +++ b/GUI/host/host.cpp @@ -2,7 +2,6 @@ #include "defs.h" #include "util.h" #include "../texthook/texthook.h" -#include extern const wchar_t* ALREADY_INJECTED; extern const wchar_t* NEED_32_BIT; diff --git a/extensions/blockmarkup.h b/extensions/blockmarkup.h new file mode 100644 index 0000000..f3975d8 --- /dev/null +++ b/extensions/blockmarkup.h @@ -0,0 +1,55 @@ +#pragma once + +#include "common.h" +#include + +template +class BlockMarkupIterator +{ +public: + BlockMarkupIterator(std::istreambuf_iterator it, const std::basic_string_view (&delimiters)[DelimiterCount]) : + it(it) + { + std::copy_n(delimiters, DelimiterCount, this->delimiters.begin()); + } + std::optional, DelimiterCount>> Next() + { + std::array, DelimiterCount> results; + std::basic_string buffer; + buffer.reserve(bufferStartSize); + Find(buffer, delimiters[0]); + buffer.clear(); + for (int i = 0; i < DelimiterCount; ++i) + { + const auto delimiter = i + 1 < DelimiterCount ? delimiters[i + 1] : end; + if (!Find(buffer, delimiter)) return {}; + buffer.erase(buffer.size() - delimiter.size()); + results[i] = std::move(buffer); + (buffer = {}).reserve(bufferStartSize); + } + return results; + } + +private: + bool Find(std::basic_string& result, std::basic_string_view delimiter) + { + while (Read((result += C{}).back())) if (result.back() == '|' && result.find(delimiter, result.size() - delimiter.size()) != std::string::npos) return true; + return false; + } + + bool Read(C& out) + { + BYTE buffer[sizeof(C)]; + for (int i = 0; i < sizeof(C); ++i, ++it) + if (it.equal({})) return false; + else buffer[i] = *it; + out = reinterpret_cast(buffer); + return true; + } + + static constexpr C endImpl[5] = { '|', 'E', 'N', 'D', '|' }; + static constexpr std::basic_string_view end{ endImpl, 5 }; + + std::istreambuf_iterator it; + std::array, DelimiterCount> delimiters; +}; diff --git a/extensions/extrawindow.cpp b/extensions/extrawindow.cpp index 1860351..d76658c 100644 --- a/extensions/extrawindow.cpp +++ b/extensions/extrawindow.cpp @@ -2,8 +2,8 @@ #include "extension.h" #include "ui_extrawindow.h" #include "defs.h" +#include "blockmarkup.h" #include -#include #include #include #include @@ -281,32 +281,31 @@ private: catch (std::filesystem::filesystem_error) { return; } dictionary.clear(); - owningStorage.clear(); + charStorage.clear(); - auto StoreCopy = [&](const std::string& string) + auto StoreCopy = [&](std::string_view string) { - return &*owningStorage.insert(owningStorage.end(), string.c_str(), string.c_str() + string.size() + 1); + auto location = &*charStorage.insert(charStorage.end(), string.begin(), string.end()); + charStorage.push_back(0); + return location; }; - std::string savedDictionary(std::istreambuf_iterator(std::ifstream(DICTIONARY_SAVE_FILE)), {}); - owningStorage.reserve(savedDictionary.size()); - for (size_t end = 0; ;) + charStorage.reserve(std::filesystem::file_size(DICTIONARY_SAVE_FILE)); + std::ifstream stream(DICTIONARY_SAVE_FILE); + BlockMarkupIterator savedDictionary(stream, Array{ "|TERM|", "|DEFINITION|" }); + while (auto read = savedDictionary.Next()) { - size_t term = savedDictionary.find("|TERM|", end); - size_t definition = savedDictionary.find("|DEFINITION|", term); - if ((end = savedDictionary.find("|END|", definition)) == std::string::npos) break; - auto storedDefinition = StoreCopy(savedDictionary.substr(definition + 12, end - definition - 12)); - for (size_t next; (next = savedDictionary.find("|TERM|", term + 1)) != std::string::npos && next < definition; term = next) - dictionary.push_back({ StoreCopy(savedDictionary.substr(term + 6, next - term - 6)), storedDefinition }); - dictionary.push_back({ StoreCopy(savedDictionary.substr(term + 6, definition - term - 6)), storedDefinition }); - } - auto oldData = owningStorage.data(); - owningStorage.shrink_to_fit(); - dictionary.shrink_to_fit(); - for (auto& [term, definition] : dictionary) - { - term += owningStorage.data() - oldData; - definition += owningStorage.data() - oldData; + const auto& [terms, definition] = *read; + auto storedDefinition = StoreCopy(definition); + std::string_view termsView = terms; + size_t start = 0, end = termsView.find("|TERM|"); + while (end != std::string::npos) + { + dictionary.push_back(DictionaryEntry{ StoreCopy(termsView.substr(start, end - start)), storedDefinition }); + start = end + 6; + end = termsView.find("|TERM|", start); + } + dictionary.push_back(DictionaryEntry{ StoreCopy(termsView.substr(start)), storedDefinition }); } std::sort(dictionary.begin(), dictionary.end()); } @@ -354,7 +353,7 @@ private: } std::filesystem::file_time_type dictionaryFileLastWrite; - std::vector owningStorage; + std::vector charStorage; std::vector definitions; int definitionIndex; } dictionaryWindow; diff --git a/extensions/replacer.cpp b/extensions/replacer.cpp index f8d968c..a67aaec 100644 --- a/extensions/replacer.cpp +++ b/extensions/replacer.cpp @@ -1,7 +1,8 @@ #include "extension.h" +#include "blockmarkup.h" #include #include -#include +#include #include extern const wchar_t* REPLACER_INSTRUCTIONS; @@ -14,14 +15,16 @@ std::shared_mutex m; class Trie { public: - Trie(std::unordered_map replacements) + Trie(const std::istream& replacementScript) { - for (const auto& [original, replacement] : replacements) + BlockMarkupIterator replacementScriptParser(replacementScript.rdbuf(), Array{ L"|ORIG|", L"|BECOMES|" }); + while (auto read = replacementScriptParser.Next()) { + const auto& [original, replacement] = *read; Node* current = &root; for (auto ch : original) if (!Ignore(ch)) current = Next(current, ch); if (current != &root) - current->value = owningStorage.insert(owningStorage.end(), replacement.c_str(), replacement.c_str() + replacement.size() + 1) - owningStorage.begin(); + current->value = charStorage.insert(charStorage.end(), replacement.c_str(), replacement.c_str() + replacement.size() + 1) - charStorage.begin(); } } @@ -38,10 +41,10 @@ public: { if (current->value >= 0) { - replacement = owningStorage.data() + current->value; + replacement = charStorage.data() + current->value; originalLength = j - i; } - if (!Ignore(sentence[j])) current = Next(current, sentence[j]); + if (!Ignore(sentence[j])) current = Next(current, sentence[j]) ? Next(current, sentence[j]) : Next(current, L'^'); } result += replacement; @@ -76,30 +79,16 @@ private: ptrdiff_t value = -1; } root; - std::vector owningStorage; -} trie = { {} }; - -std::unordered_map Parse(std::wstring_view replacementScript) -{ - std::unordered_map replacements; - for (size_t end = 0; ;) - { - size_t original = replacementScript.find(L"|ORIG|", end); - size_t becomes = replacementScript.find(L"|BECOMES|", original); - if ((end = replacementScript.find(L"|END|", becomes)) == std::wstring::npos) break; - replacements[std::wstring(replacementScript.substr(original + 6, becomes - original - 6))] = replacementScript.substr(becomes + 9, end - becomes - 9); - } - return replacements; -} + std::vector charStorage; +} trie = { std::istringstream("") }; void UpdateReplacements() { try { if (replaceFileLastWrite.exchange(std::filesystem::last_write_time(REPLACE_SAVE_FILE)) == std::filesystem::last_write_time(REPLACE_SAVE_FILE)) return; - std::vector file(std::istreambuf_iterator(std::ifstream(REPLACE_SAVE_FILE, std::ios::binary)), {}); std::scoped_lock l(m); - trie = Trie(Parse({ (wchar_t*)file.data(), file.size() / sizeof(wchar_t) })); + trie = Trie(std::ifstream(REPLACE_SAVE_FILE, std::ios::binary)); } catch (std::filesystem::filesystem_error) { replaceFileLastWrite.store({}); } } @@ -138,12 +127,12 @@ bool ProcessSentence(std::wstring& sentence, SentenceInfo) TEST( { - auto replacements = Parse(LR"( + std::wstring replacementScript = LR"( |ORIG|さよなら|BECOMES|goodbye |END|Ignore this text And this text ツ   |ORIG|バカ|BECOMES|idiot|END| -|ORIG|こんにちは |BECOMES| hello|END||ORIG|delete this|BECOMES||END|)"); - assert(replacements.size() == 4); +|ORIG|こんにちは |BECOMES| hello|END||ORIG|delet^this|BECOMES||END|)"; + Trie replacements(std::istringstream(std::string{ (const char*)replacementScript.c_str(), replacementScript.size() * sizeof(wchar_t) })); std::wstring original = LR"(Don't replace this  さよなら バカ こんにちは delete this)"; std::wstring replaced = Trie(std::move(replacements)).Replace(original); diff --git a/include/common.h b/include/common.h index dd4689e..a303bb2 100644 --- a/include/common.h +++ b/include/common.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -33,7 +34,7 @@ struct ArrayImpl { using type = T[]; }; template using Array = typename ArrayImpl::type; -template using Functor = std::integral_constant, F>; +template using Functor = std::integral_constant, F>; template struct Identity { V operator()(V v) const { return v; } }; diff --git a/test/main.cpp b/test/main.cpp index 3535971..d87682b 100644 --- a/test/main.cpp +++ b/test/main.cpp @@ -1,7 +1,6 @@ #include "common.h" #include "defs.h" #include "resource.h" -#include #include #include #include diff --git a/text.cpp b/text.cpp index e08b35f..d37f8a0 100644 --- a/text.cpp +++ b/text.cpp @@ -110,7 +110,7 @@ const char* OUT_OF_RECORDS_RETRY = u8"Textractor: out of search records, please const char* FUNC_MISSING = u8"Textractor: function not present"; const char* MODULE_MISSING = u8"Textractor: module not present"; const char* GARBAGE_MEMORY = u8"Textractor: memory constantly changing, useless to read"; -const char* SEND_ERROR = u8"Textractor: Send ERROR (likely an incorrect H-code)"; +const char* SEND_ERROR = u8"Textractor: Send ERROR (likely an unstable/incorrect H-code)"; const char* READ_ERROR = u8"Textractor: Reader ERROR (likely an incorrect R-code)"; const char* HIJACK_ERROR = u8"Textractor: Hijack ERROR"; const char* COULD_NOT_FIND = u8"Textractor: could not find text"; @@ -174,6 +174,7 @@ const wchar_t* REPLACER_INSTRUCTIONS = LR"(This file only does anything when the Replacement commands must be formatted like this: |ORIG|original_text|BECOMES|replacement_text|END| All text in this file outside of a replacement command is ignored. +A caret (^) acts as a wildcard that matches any other single character. Whitespace in original_text is ignored, but replacement_text can contain spaces, newlines, etc. This file must be encoded in Unicode (UTF-16 Little Endian).)"; const char* THREAD_LINKER = u8"Thread Linker";