From f890789a3b4db255293d591ae0fe09306dd30da0 Mon Sep 17 00:00:00 2001 From: Akash Mozumdar Date: Thu, 13 Feb 2020 02:16:21 -0700 Subject: [PATCH] add some classes --- extensions/blockmarkuplanguage.h | 10 +++ extensions/charstorage.h | 31 ++++++++ extensions/extrawindow.cpp | 17 ++--- extensions/replacer.cpp | 62 +++++++--------- extensions/trie.h | 121 +++++++++++++++++++++++++++++++ 5 files changed, 193 insertions(+), 48 deletions(-) create mode 100644 extensions/blockmarkuplanguage.h create mode 100644 extensions/charstorage.h create mode 100644 extensions/trie.h diff --git a/extensions/blockmarkuplanguage.h b/extensions/blockmarkuplanguage.h new file mode 100644 index 0000000..301da40 --- /dev/null +++ b/extensions/blockmarkuplanguage.h @@ -0,0 +1,10 @@ +#pragma once + +#include "common.h" +#include + +template +class BlockMarkupLanguageIterator +{ + std::istreambuf_iterator it; +}; diff --git a/extensions/charstorage.h b/extensions/charstorage.h new file mode 100644 index 0000000..3a17104 --- /dev/null +++ b/extensions/charstorage.h @@ -0,0 +1,31 @@ +#pragma once + +#include "common.h" + +template +class CharStorage +{ +public: + CharStorage(size_t capacity = 0) + { + storage.reserve(capacity); + } + + int Store(const std::basic_string& string) + { + return storage.insert(storage.end(), string.c_str(), string.c_str() + string.size() + 1) - storage.begin(); + } + + void FreeExcess() + { + storage.shrink_to_fit(); + } + + const C* Retrieve(int handle) const + { + return storage.data() + handle; + } + +private: + std::vector storage; +}; \ No newline at end of file diff --git a/extensions/extrawindow.cpp b/extensions/extrawindow.cpp index 1860351..34b9103 100644 --- a/extensions/extrawindow.cpp +++ b/extensions/extrawindow.cpp @@ -2,6 +2,7 @@ #include "extension.h" #include "ui_extrawindow.h" #include "defs.h" +#include "blockmarkuplanguage.h" #include #include #include @@ -281,15 +282,15 @@ private: catch (std::filesystem::filesystem_error) { return; } dictionary.clear(); - owningStorage.clear(); + definitionStorage.clear(); auto StoreCopy = [&](const std::string& string) { - return &*owningStorage.insert(owningStorage.end(), string.c_str(), string.c_str() + string.size() + 1); + return &*definitionStorage.insert(definitionStorage.end(), string.c_str(), string.c_str() + string.size() + 1); }; std::string savedDictionary(std::istreambuf_iterator(std::ifstream(DICTIONARY_SAVE_FILE)), {}); - owningStorage.reserve(savedDictionary.size()); + definitionStorage.reserve(savedDictionary.size()); for (size_t end = 0; ;) { size_t term = savedDictionary.find("|TERM|", end); @@ -300,14 +301,6 @@ private: dictionary.push_back({ StoreCopy(savedDictionary.substr(term + 6, next - term - 6)), storedDefinition }); dictionary.push_back({ StoreCopy(savedDictionary.substr(term + 6, definition - term - 6)), storedDefinition }); } - auto oldData = owningStorage.data(); - owningStorage.shrink_to_fit(); - dictionary.shrink_to_fit(); - for (auto& [term, definition] : dictionary) - { - term += owningStorage.data() - oldData; - definition += owningStorage.data() - oldData; - } std::sort(dictionary.begin(), dictionary.end()); } @@ -354,7 +347,7 @@ private: } std::filesystem::file_time_type dictionaryFileLastWrite; - std::vector owningStorage; + std::vector definitionStorage; std::vector definitions; int definitionIndex; } dictionaryWindow; diff --git a/extensions/replacer.cpp b/extensions/replacer.cpp index f8d968c..50a298c 100644 --- a/extensions/replacer.cpp +++ b/extensions/replacer.cpp @@ -1,4 +1,6 @@ #include "extension.h" +#include "trie.h" +#include "charstorage.h" #include #include #include @@ -11,18 +13,14 @@ constexpr auto REPLACE_SAVE_FILE = u8"SavedReplacements.txt"; std::atomic replaceFileLastWrite = {}; std::shared_mutex m; -class Trie +class ReplacementTrie { public: - Trie(std::unordered_map replacements) + ReplacementTrie(std::vector> replacements) { - for (const auto& [original, replacement] : replacements) - { - Node* current = &root; - for (auto ch : original) if (!Ignore(ch)) current = Next(current, ch); - if (current != &root) - current->value = owningStorage.insert(owningStorage.end(), replacement.c_str(), replacement.c_str() + replacement.size() + 1) - owningStorage.begin(); - } + for (auto& [original, replacement] : replacements) + if (!original.empty()) + trie.Insert(std::wstring_view(original.c_str(), std::remove_if(original.begin(), original.end(), Ignore) - original.begin()))->SetValue(storage.Store(replacement)); } std::wstring Replace(const std::wstring& sentence) const @@ -33,17 +31,23 @@ public: std::wstring_view replacement(sentence.c_str() + i, 1); int originalLength = 1; - const Node* current = &root; + auto current = trie.Root(); for (int j = i; current && j <= sentence.size(); ++j) { - if (current->value >= 0) + if (const wchar_t* tail = current->Tail()) + for (; j <= sentence.size() && *tail; ++j) + if (Ignore(sentence[j])); + else if (sentence[j] == *tail) ++tail; + else goto doneSearchingTrie; + if (int* value = current->Value()) { - replacement = owningStorage.data() + current->value; + replacement = storage.Retrieve(*value); originalLength = j - i; } - if (!Ignore(sentence[j])) current = Next(current, sentence[j]); + if (!Ignore(sentence[j])) current = trie.Next(current, sentence[j]); } - + + doneSearchingTrie: result += replacement; i += originalLength; } @@ -52,7 +56,7 @@ public: bool Empty() { - return root.charMap.empty(); + return trie.Root()->charMap.empty(); } private: @@ -61,33 +65,19 @@ private: return ch <= 0x20 || std::iswspace(ch); } - template - static Node* Next(Node* node, wchar_t ch) - { - auto it = std::lower_bound(node->charMap.begin(), node->charMap.end(), ch, [](const auto& one, auto two) { return one.first < two; }); - if (it != node->charMap.end() && it->first == ch) return it->second.get(); - if constexpr (!std::is_const_v) return node->charMap.insert(it, { ch, std::make_unique() })->second.get(); - return nullptr; - } - - struct Node - { - std::vector>> charMap; - ptrdiff_t value = -1; - } root; - - std::vector owningStorage; + CharStorage storage; + Trie trie; } trie = { {} }; -std::unordered_map Parse(std::wstring_view replacementScript) +std::vector> Parse(std::wstring_view replacementScript) { - std::unordered_map replacements; + std::vector> replacements; for (size_t end = 0; ;) { size_t original = replacementScript.find(L"|ORIG|", end); size_t becomes = replacementScript.find(L"|BECOMES|", original); if ((end = replacementScript.find(L"|END|", becomes)) == std::wstring::npos) break; - replacements[std::wstring(replacementScript.substr(original + 6, becomes - original - 6))] = replacementScript.substr(becomes + 9, end - becomes - 9); + replacements.emplace_back(replacementScript.substr(original + 6, becomes - original - 6), replacementScript.substr(becomes + 9, end - becomes - 9)); } return replacements; } @@ -99,7 +89,7 @@ void UpdateReplacements() if (replaceFileLastWrite.exchange(std::filesystem::last_write_time(REPLACE_SAVE_FILE)) == std::filesystem::last_write_time(REPLACE_SAVE_FILE)) return; std::vector file(std::istreambuf_iterator(std::ifstream(REPLACE_SAVE_FILE, std::ios::binary)), {}); std::scoped_lock l(m); - trie = Trie(Parse({ (wchar_t*)file.data(), file.size() / sizeof(wchar_t) })); + trie = ReplacementTrie(Parse({ (wchar_t*)file.data(), file.size() / sizeof(wchar_t) })); } catch (std::filesystem::filesystem_error) { replaceFileLastWrite.store({}); } } @@ -146,7 +136,7 @@ And this text ツ   assert(replacements.size() == 4); std::wstring original = LR"(Don't replace this  さよなら バカ こんにちは delete this)"; - std::wstring replaced = Trie(std::move(replacements)).Replace(original); + std::wstring replaced = ReplacementTrie(std::move(replacements)).Replace(original); assert(replaced == L"Don't replace thisgoodbye idiot hello"); } ); diff --git a/extensions/trie.h b/extensions/trie.h new file mode 100644 index 0000000..4729912 --- /dev/null +++ b/extensions/trie.h @@ -0,0 +1,121 @@ +#pragma once + +#include "common.h" +#include + +template +struct Trie +{ + struct Node + { + union + { + std::basic_string chars; + std::vector>> charMap; + }; + uint64_t packedValue; + const C* Tail() const + { + return packedValue >> 63 ? chars.c_str() : nullptr; + } + V* Value() const + { + return (V*)((packedValue << 2) >> 2); + } + void SetValue(V value) + { + if (V* oldValue = Value()) *oldValue = std::move(value); + else packedValue = (1LL << (62 + (packedValue >> 63))) | (uint64_t)new V(std::move(value)); + } + Node(bool map) : + packedValue(1LL << (62 + !map)) + { + if (map) new (&charMap) decltype(charMap)(); + else new (&chars) decltype(chars)(); + } + ~Node() + { + if (packedValue >> 63) chars.~basic_string(); + else charMap.~vector(); + delete Value(); + } + }; + + template + static Node* Next(Node* node, C ch, bool makeMap = false) + { + if (node->packedValue >> 63) return nullptr; + auto it = std::lower_bound(node->charMap.begin(), node->charMap.end(), ch, [](const auto& one, auto two) { return one.first < two; }); + if (it != node->charMap.end() && it->first == ch) return it->second.get(); + if constexpr (!std::is_const_v) return node->charMap.insert(it, { ch, std::make_unique(makeMap) })->second.get(); + return nullptr; + } + + static void Clear(Node* node) + { + if (node->packedValue >> 63) node->chars.clear(); + else for (auto& [_, child] : node->charMap) Clear(child.get()); + } + + std::unique_ptr root = std::make_unique(true); + + Node* Insert(std::basic_string_view key) + { + Node* current = root.get(); + for (int i = 0; i < key.size(); ++i) + { + if (Node* next = Next(current, key[i], i + 1 == key.size())) current = next; + else + { + if (current->chars.empty()) // FIXME: how to represent last character inside map? + { + current->chars = std::basic_string(key.begin() + i, key.end()); + if(current->chars.empty())throw; + break; + } + else if (current->chars == key.substr(i)) + { + break; + } + else + { + auto oldChars = std::move(current->chars); + assert(current->Value()); + auto oldValue = std::move(*current->Value()); + auto keyRemaining = key.substr(i); + current->chars.~basic_string(); + new (¤t->charMap) decltype(current->charMap)(); + current->packedValue ^= 3ULL << 62; + for (i = 0; i < oldChars.size() && i < keyRemaining.size(); ++i) + { + if (oldChars[i] == keyRemaining[i]) current = Next(current, oldChars[i], true); + else break; + } + if (i == oldChars.size()) + { + current->SetValue(std::move(oldValue)); + } + else + { + auto relocated = Next(current, oldChars[i]); + relocated->chars = oldChars.substr(i); + relocated->SetValue(std::move(oldValue)); + } + if (i != keyRemaining.size()) (current = Next(current, keyRemaining[i]))->chars = std::basic_string(keyRemaining.begin() + i, keyRemaining.end()); + break; + } + } + } + return current; + } + + const Node* Root() const + { + return root.get(); + } + + bool Empty() const + { + return root->charMap.empty(); + } +};