implement replacer wildcard and optimize memory usage when loading files

This commit is contained in:
Akash Mozumdar 2020-02-16 17:58:09 -07:00
parent b0eeca5b36
commit b61272a5e6
7 changed files with 96 additions and 53 deletions

View File

@ -2,7 +2,6 @@
#include "defs.h" #include "defs.h"
#include "util.h" #include "util.h"
#include "../texthook/texthook.h" #include "../texthook/texthook.h"
#include <filesystem>
extern const wchar_t* ALREADY_INJECTED; extern const wchar_t* ALREADY_INJECTED;
extern const wchar_t* NEED_32_BIT; extern const wchar_t* NEED_32_BIT;

55
extensions/blockmarkup.h Normal file
View File

@ -0,0 +1,55 @@
#pragma once
#include "common.h"
#include <istream>
template <typename C, int DelimiterCount, int bufferStartSize = 200>
class BlockMarkupIterator
{
public:
BlockMarkupIterator(std::istreambuf_iterator<char> it, const std::basic_string_view<C> (&delimiters)[DelimiterCount]) :
it(it)
{
std::copy_n(delimiters, DelimiterCount, this->delimiters.begin());
}
std::optional<std::array<std::basic_string<C>, DelimiterCount>> Next()
{
std::array<std::basic_string<C>, DelimiterCount> results;
std::basic_string<C> buffer;
buffer.reserve(bufferStartSize);
Find(buffer, delimiters[0]);
buffer.clear();
for (int i = 0; i < DelimiterCount; ++i)
{
const auto delimiter = i + 1 < DelimiterCount ? delimiters[i + 1] : end;
if (!Find(buffer, delimiter)) return {};
buffer.erase(buffer.size() - delimiter.size());
results[i] = std::move(buffer);
(buffer = {}).reserve(bufferStartSize);
}
return results;
}
private:
bool Find(std::basic_string<C>& result, std::basic_string_view<C> delimiter)
{
while (Read((result += C{}).back())) if (result.back() == '|' && result.find(delimiter, result.size() - delimiter.size()) != std::string::npos) return true;
return false;
}
bool Read(C& out)
{
BYTE buffer[sizeof(C)];
for (int i = 0; i < sizeof(C); ++i, ++it)
if (it.equal({})) return false;
else buffer[i] = *it;
out = reinterpret_cast<C&>(buffer);
return true;
}
static constexpr C endImpl[5] = { '|', 'E', 'N', 'D', '|' };
static constexpr std::basic_string_view end{ endImpl, 5 };
std::istreambuf_iterator<char> it;
std::array<std::basic_string_view<C>, DelimiterCount> delimiters;
};

View File

@ -2,8 +2,8 @@
#include "extension.h" #include "extension.h"
#include "ui_extrawindow.h" #include "ui_extrawindow.h"
#include "defs.h" #include "defs.h"
#include "blockmarkup.h"
#include <fstream> #include <fstream>
#include <filesystem>
#include <process.h> #include <process.h>
#include <QColorDialog> #include <QColorDialog>
#include <QFontDialog> #include <QFontDialog>
@ -281,32 +281,31 @@ private:
catch (std::filesystem::filesystem_error) { return; } catch (std::filesystem::filesystem_error) { return; }
dictionary.clear(); dictionary.clear();
owningStorage.clear(); charStorage.clear();
auto StoreCopy = [&](const std::string& string) auto StoreCopy = [&](std::string_view string)
{ {
return &*owningStorage.insert(owningStorage.end(), string.c_str(), string.c_str() + string.size() + 1); auto location = &*charStorage.insert(charStorage.end(), string.begin(), string.end());
charStorage.push_back(0);
return location;
}; };
std::string savedDictionary(std::istreambuf_iterator(std::ifstream(DICTIONARY_SAVE_FILE)), {}); charStorage.reserve(std::filesystem::file_size(DICTIONARY_SAVE_FILE));
owningStorage.reserve(savedDictionary.size()); std::ifstream stream(DICTIONARY_SAVE_FILE);
for (size_t end = 0; ;) BlockMarkupIterator savedDictionary(stream, Array<std::string_view>{ "|TERM|", "|DEFINITION|" });
while (auto read = savedDictionary.Next())
{ {
size_t term = savedDictionary.find("|TERM|", end); const auto& [terms, definition] = *read;
size_t definition = savedDictionary.find("|DEFINITION|", term); auto storedDefinition = StoreCopy(definition);
if ((end = savedDictionary.find("|END|", definition)) == std::string::npos) break; std::string_view termsView = terms;
auto storedDefinition = StoreCopy(savedDictionary.substr(definition + 12, end - definition - 12)); size_t start = 0, end = termsView.find("|TERM|");
for (size_t next; (next = savedDictionary.find("|TERM|", term + 1)) != std::string::npos && next < definition; term = next) while (end != std::string::npos)
dictionary.push_back({ StoreCopy(savedDictionary.substr(term + 6, next - term - 6)), storedDefinition }); {
dictionary.push_back({ StoreCopy(savedDictionary.substr(term + 6, definition - term - 6)), storedDefinition }); dictionary.push_back(DictionaryEntry{ StoreCopy(termsView.substr(start, end - start)), storedDefinition });
start = end + 6;
end = termsView.find("|TERM|", start);
} }
auto oldData = owningStorage.data(); dictionary.push_back(DictionaryEntry{ StoreCopy(termsView.substr(start)), storedDefinition });
owningStorage.shrink_to_fit();
dictionary.shrink_to_fit();
for (auto& [term, definition] : dictionary)
{
term += owningStorage.data() - oldData;
definition += owningStorage.data() - oldData;
} }
std::sort(dictionary.begin(), dictionary.end()); std::sort(dictionary.begin(), dictionary.end());
} }
@ -354,7 +353,7 @@ private:
} }
std::filesystem::file_time_type dictionaryFileLastWrite; std::filesystem::file_time_type dictionaryFileLastWrite;
std::vector<char> owningStorage; std::vector<char> charStorage;
std::vector<QString> definitions; std::vector<QString> definitions;
int definitionIndex; int definitionIndex;
} dictionaryWindow; } dictionaryWindow;

View File

@ -1,7 +1,8 @@
#include "extension.h" #include "extension.h"
#include "blockmarkup.h"
#include <cwctype> #include <cwctype>
#include <fstream> #include <fstream>
#include <filesystem> #include <sstream>
#include <process.h> #include <process.h>
extern const wchar_t* REPLACER_INSTRUCTIONS; extern const wchar_t* REPLACER_INSTRUCTIONS;
@ -14,14 +15,16 @@ std::shared_mutex m;
class Trie class Trie
{ {
public: public:
Trie(std::unordered_map<std::wstring, std::wstring> replacements) Trie(const std::istream& replacementScript)
{ {
for (const auto& [original, replacement] : replacements) BlockMarkupIterator replacementScriptParser(replacementScript.rdbuf(), Array<std::wstring_view>{ L"|ORIG|", L"|BECOMES|" });
while (auto read = replacementScriptParser.Next())
{ {
const auto& [original, replacement] = *read;
Node* current = &root; Node* current = &root;
for (auto ch : original) if (!Ignore(ch)) current = Next(current, ch); for (auto ch : original) if (!Ignore(ch)) current = Next(current, ch);
if (current != &root) if (current != &root)
current->value = owningStorage.insert(owningStorage.end(), replacement.c_str(), replacement.c_str() + replacement.size() + 1) - owningStorage.begin(); current->value = charStorage.insert(charStorage.end(), replacement.c_str(), replacement.c_str() + replacement.size() + 1) - charStorage.begin();
} }
} }
@ -38,10 +41,10 @@ public:
{ {
if (current->value >= 0) if (current->value >= 0)
{ {
replacement = owningStorage.data() + current->value; replacement = charStorage.data() + current->value;
originalLength = j - i; originalLength = j - i;
} }
if (!Ignore(sentence[j])) current = Next(current, sentence[j]); if (!Ignore(sentence[j])) current = Next(current, sentence[j]) ? Next(current, sentence[j]) : Next(current, L'^');
} }
result += replacement; result += replacement;
@ -76,30 +79,16 @@ private:
ptrdiff_t value = -1; ptrdiff_t value = -1;
} root; } root;
std::vector<wchar_t> owningStorage; std::vector<wchar_t> charStorage;
} trie = { {} }; } trie = { std::istringstream("") };
std::unordered_map<std::wstring, std::wstring> Parse(std::wstring_view replacementScript)
{
std::unordered_map<std::wstring, std::wstring> replacements;
for (size_t end = 0; ;)
{
size_t original = replacementScript.find(L"|ORIG|", end);
size_t becomes = replacementScript.find(L"|BECOMES|", original);
if ((end = replacementScript.find(L"|END|", becomes)) == std::wstring::npos) break;
replacements[std::wstring(replacementScript.substr(original + 6, becomes - original - 6))] = replacementScript.substr(becomes + 9, end - becomes - 9);
}
return replacements;
}
void UpdateReplacements() void UpdateReplacements()
{ {
try try
{ {
if (replaceFileLastWrite.exchange(std::filesystem::last_write_time(REPLACE_SAVE_FILE)) == std::filesystem::last_write_time(REPLACE_SAVE_FILE)) return; if (replaceFileLastWrite.exchange(std::filesystem::last_write_time(REPLACE_SAVE_FILE)) == std::filesystem::last_write_time(REPLACE_SAVE_FILE)) return;
std::vector<BYTE> file(std::istreambuf_iterator(std::ifstream(REPLACE_SAVE_FILE, std::ios::binary)), {});
std::scoped_lock l(m); std::scoped_lock l(m);
trie = Trie(Parse({ (wchar_t*)file.data(), file.size() / sizeof(wchar_t) })); trie = Trie(std::ifstream(REPLACE_SAVE_FILE, std::ios::binary));
} }
catch (std::filesystem::filesystem_error) { replaceFileLastWrite.store({}); } catch (std::filesystem::filesystem_error) { replaceFileLastWrite.store({}); }
} }
@ -138,12 +127,12 @@ bool ProcessSentence(std::wstring& sentence, SentenceInfo)
TEST( TEST(
{ {
auto replacements = Parse(LR"( std::wstring replacementScript = LR"(
|ORIG||BECOMES|goodbye |END|Ignore this text |ORIG||BECOMES|goodbye |END|Ignore this text
And this text    And this text   
|ORIG||BECOMES|idiot|END| |ORIG||BECOMES|idiot|END|
|ORIG| |BECOMES| hello|END||ORIG|delete this|BECOMES||END|)"); |ORIG| |BECOMES| hello|END||ORIG|delet^this|BECOMES||END|)";
assert(replacements.size() == 4); Trie replacements(std::istringstream(std::string{ (const char*)replacementScript.c_str(), replacementScript.size() * sizeof(wchar_t) }));
std::wstring original = LR"(Don't replace this  std::wstring original = LR"(Don't replace this 
delete this)"; delete this)";
std::wstring replaced = Trie(std::move(replacements)).Replace(original); std::wstring replaced = Trie(std::move(replacements)).Replace(original);

View File

@ -17,6 +17,7 @@
#include <mutex> #include <mutex>
#include <shared_mutex> #include <shared_mutex>
#include <atomic> #include <atomic>
#include <filesystem>
#include <cstdint> #include <cstdint>
#include <cassert> #include <cassert>
@ -33,7 +34,7 @@ struct ArrayImpl<T> { using type = T[]; };
template <typename... Ts> template <typename... Ts>
using Array = typename ArrayImpl<Ts...>::type; using Array = typename ArrayImpl<Ts...>::type;
template <auto F> using Functor = std::integral_constant<std::decay_t<decltype(F)>, F>; template <auto F> using Functor = std::integral_constant<std::remove_reference_t<decltype(F)>, F>;
template <typename V> template <typename V>
struct Identity { V operator()(V v) const { return v; } }; struct Identity { V operator()(V v) const { return v; } };

View File

@ -1,7 +1,6 @@
#include "common.h" #include "common.h"
#include "defs.h" #include "defs.h"
#include "resource.h" #include "resource.h"
#include <filesystem>
#include <fstream> #include <fstream>
#include <sstream> #include <sstream>
#include <QApplication> #include <QApplication>

View File

@ -110,7 +110,7 @@ const char* OUT_OF_RECORDS_RETRY = u8"Textractor: out of search records, please
const char* FUNC_MISSING = u8"Textractor: function not present"; const char* FUNC_MISSING = u8"Textractor: function not present";
const char* MODULE_MISSING = u8"Textractor: module not present"; const char* MODULE_MISSING = u8"Textractor: module not present";
const char* GARBAGE_MEMORY = u8"Textractor: memory constantly changing, useless to read"; const char* GARBAGE_MEMORY = u8"Textractor: memory constantly changing, useless to read";
const char* SEND_ERROR = u8"Textractor: Send ERROR (likely an incorrect H-code)"; const char* SEND_ERROR = u8"Textractor: Send ERROR (likely an unstable/incorrect H-code)";
const char* READ_ERROR = u8"Textractor: Reader ERROR (likely an incorrect R-code)"; const char* READ_ERROR = u8"Textractor: Reader ERROR (likely an incorrect R-code)";
const char* HIJACK_ERROR = u8"Textractor: Hijack ERROR"; const char* HIJACK_ERROR = u8"Textractor: Hijack ERROR";
const char* COULD_NOT_FIND = u8"Textractor: could not find text"; const char* COULD_NOT_FIND = u8"Textractor: could not find text";
@ -174,6 +174,7 @@ const wchar_t* REPLACER_INSTRUCTIONS = LR"(This file only does anything when the
Replacement commands must be formatted like this: Replacement commands must be formatted like this:
|ORIG|original_text|BECOMES|replacement_text|END| |ORIG|original_text|BECOMES|replacement_text|END|
All text in this file outside of a replacement command is ignored. All text in this file outside of a replacement command is ignored.
A caret (^) acts as a wildcard that matches any other single character.
Whitespace in original_text is ignored, but replacement_text can contain spaces, newlines, etc. Whitespace in original_text is ignored, but replacement_text can contain spaces, newlines, etc.
This file must be encoded in Unicode (UTF-16 Little Endian).)"; This file must be encoded in Unicode (UTF-16 Little Endian).)";
const char* THREAD_LINKER = u8"Thread Linker"; const char* THREAD_LINKER = u8"Thread Linker";