implement replacer wildcard and optimize memory usage when loading files

This commit is contained in:
Akash Mozumdar 2020-02-16 17:58:09 -07:00
parent b0eeca5b36
commit b61272a5e6
7 changed files with 96 additions and 53 deletions

View File

@ -2,7 +2,6 @@
#include "defs.h"
#include "util.h"
#include "../texthook/texthook.h"
#include <filesystem>
extern const wchar_t* ALREADY_INJECTED;
extern const wchar_t* NEED_32_BIT;

55
extensions/blockmarkup.h Normal file
View File

@ -0,0 +1,55 @@
#pragma once
#include "common.h"
#include <istream>
template <typename C, int DelimiterCount, int bufferStartSize = 200>
class BlockMarkupIterator
{
public:
BlockMarkupIterator(std::istreambuf_iterator<char> it, const std::basic_string_view<C> (&delimiters)[DelimiterCount]) :
it(it)
{
std::copy_n(delimiters, DelimiterCount, this->delimiters.begin());
}
std::optional<std::array<std::basic_string<C>, DelimiterCount>> Next()
{
std::array<std::basic_string<C>, DelimiterCount> results;
std::basic_string<C> buffer;
buffer.reserve(bufferStartSize);
Find(buffer, delimiters[0]);
buffer.clear();
for (int i = 0; i < DelimiterCount; ++i)
{
const auto delimiter = i + 1 < DelimiterCount ? delimiters[i + 1] : end;
if (!Find(buffer, delimiter)) return {};
buffer.erase(buffer.size() - delimiter.size());
results[i] = std::move(buffer);
(buffer = {}).reserve(bufferStartSize);
}
return results;
}
private:
bool Find(std::basic_string<C>& result, std::basic_string_view<C> delimiter)
{
while (Read((result += C{}).back())) if (result.back() == '|' && result.find(delimiter, result.size() - delimiter.size()) != std::string::npos) return true;
return false;
}
bool Read(C& out)
{
BYTE buffer[sizeof(C)];
for (int i = 0; i < sizeof(C); ++i, ++it)
if (it.equal({})) return false;
else buffer[i] = *it;
out = reinterpret_cast<C&>(buffer);
return true;
}
static constexpr C endImpl[5] = { '|', 'E', 'N', 'D', '|' };
static constexpr std::basic_string_view end{ endImpl, 5 };
std::istreambuf_iterator<char> it;
std::array<std::basic_string_view<C>, DelimiterCount> delimiters;
};

View File

@ -2,8 +2,8 @@
#include "extension.h"
#include "ui_extrawindow.h"
#include "defs.h"
#include "blockmarkup.h"
#include <fstream>
#include <filesystem>
#include <process.h>
#include <QColorDialog>
#include <QFontDialog>
@ -281,32 +281,31 @@ private:
catch (std::filesystem::filesystem_error) { return; }
dictionary.clear();
owningStorage.clear();
charStorage.clear();
auto StoreCopy = [&](const std::string& string)
auto StoreCopy = [&](std::string_view string)
{
return &*owningStorage.insert(owningStorage.end(), string.c_str(), string.c_str() + string.size() + 1);
auto location = &*charStorage.insert(charStorage.end(), string.begin(), string.end());
charStorage.push_back(0);
return location;
};
std::string savedDictionary(std::istreambuf_iterator(std::ifstream(DICTIONARY_SAVE_FILE)), {});
owningStorage.reserve(savedDictionary.size());
for (size_t end = 0; ;)
charStorage.reserve(std::filesystem::file_size(DICTIONARY_SAVE_FILE));
std::ifstream stream(DICTIONARY_SAVE_FILE);
BlockMarkupIterator savedDictionary(stream, Array<std::string_view>{ "|TERM|", "|DEFINITION|" });
while (auto read = savedDictionary.Next())
{
size_t term = savedDictionary.find("|TERM|", end);
size_t definition = savedDictionary.find("|DEFINITION|", term);
if ((end = savedDictionary.find("|END|", definition)) == std::string::npos) break;
auto storedDefinition = StoreCopy(savedDictionary.substr(definition + 12, end - definition - 12));
for (size_t next; (next = savedDictionary.find("|TERM|", term + 1)) != std::string::npos && next < definition; term = next)
dictionary.push_back({ StoreCopy(savedDictionary.substr(term + 6, next - term - 6)), storedDefinition });
dictionary.push_back({ StoreCopy(savedDictionary.substr(term + 6, definition - term - 6)), storedDefinition });
}
auto oldData = owningStorage.data();
owningStorage.shrink_to_fit();
dictionary.shrink_to_fit();
for (auto& [term, definition] : dictionary)
{
term += owningStorage.data() - oldData;
definition += owningStorage.data() - oldData;
const auto& [terms, definition] = *read;
auto storedDefinition = StoreCopy(definition);
std::string_view termsView = terms;
size_t start = 0, end = termsView.find("|TERM|");
while (end != std::string::npos)
{
dictionary.push_back(DictionaryEntry{ StoreCopy(termsView.substr(start, end - start)), storedDefinition });
start = end + 6;
end = termsView.find("|TERM|", start);
}
dictionary.push_back(DictionaryEntry{ StoreCopy(termsView.substr(start)), storedDefinition });
}
std::sort(dictionary.begin(), dictionary.end());
}
@ -354,7 +353,7 @@ private:
}
std::filesystem::file_time_type dictionaryFileLastWrite;
std::vector<char> owningStorage;
std::vector<char> charStorage;
std::vector<QString> definitions;
int definitionIndex;
} dictionaryWindow;

View File

@ -1,7 +1,8 @@
#include "extension.h"
#include "blockmarkup.h"
#include <cwctype>
#include <fstream>
#include <filesystem>
#include <sstream>
#include <process.h>
extern const wchar_t* REPLACER_INSTRUCTIONS;
@ -14,14 +15,16 @@ std::shared_mutex m;
class Trie
{
public:
Trie(std::unordered_map<std::wstring, std::wstring> replacements)
Trie(const std::istream& replacementScript)
{
for (const auto& [original, replacement] : replacements)
BlockMarkupIterator replacementScriptParser(replacementScript.rdbuf(), Array<std::wstring_view>{ L"|ORIG|", L"|BECOMES|" });
while (auto read = replacementScriptParser.Next())
{
const auto& [original, replacement] = *read;
Node* current = &root;
for (auto ch : original) if (!Ignore(ch)) current = Next(current, ch);
if (current != &root)
current->value = owningStorage.insert(owningStorage.end(), replacement.c_str(), replacement.c_str() + replacement.size() + 1) - owningStorage.begin();
current->value = charStorage.insert(charStorage.end(), replacement.c_str(), replacement.c_str() + replacement.size() + 1) - charStorage.begin();
}
}
@ -38,10 +41,10 @@ public:
{
if (current->value >= 0)
{
replacement = owningStorage.data() + current->value;
replacement = charStorage.data() + current->value;
originalLength = j - i;
}
if (!Ignore(sentence[j])) current = Next(current, sentence[j]);
if (!Ignore(sentence[j])) current = Next(current, sentence[j]) ? Next(current, sentence[j]) : Next(current, L'^');
}
result += replacement;
@ -76,30 +79,16 @@ private:
ptrdiff_t value = -1;
} root;
std::vector<wchar_t> owningStorage;
} trie = { {} };
std::unordered_map<std::wstring, std::wstring> Parse(std::wstring_view replacementScript)
{
std::unordered_map<std::wstring, std::wstring> replacements;
for (size_t end = 0; ;)
{
size_t original = replacementScript.find(L"|ORIG|", end);
size_t becomes = replacementScript.find(L"|BECOMES|", original);
if ((end = replacementScript.find(L"|END|", becomes)) == std::wstring::npos) break;
replacements[std::wstring(replacementScript.substr(original + 6, becomes - original - 6))] = replacementScript.substr(becomes + 9, end - becomes - 9);
}
return replacements;
}
std::vector<wchar_t> charStorage;
} trie = { std::istringstream("") };
void UpdateReplacements()
{
try
{
if (replaceFileLastWrite.exchange(std::filesystem::last_write_time(REPLACE_SAVE_FILE)) == std::filesystem::last_write_time(REPLACE_SAVE_FILE)) return;
std::vector<BYTE> file(std::istreambuf_iterator(std::ifstream(REPLACE_SAVE_FILE, std::ios::binary)), {});
std::scoped_lock l(m);
trie = Trie(Parse({ (wchar_t*)file.data(), file.size() / sizeof(wchar_t) }));
trie = Trie(std::ifstream(REPLACE_SAVE_FILE, std::ios::binary));
}
catch (std::filesystem::filesystem_error) { replaceFileLastWrite.store({}); }
}
@ -138,12 +127,12 @@ bool ProcessSentence(std::wstring& sentence, SentenceInfo)
TEST(
{
auto replacements = Parse(LR"(
std::wstring replacementScript = LR"(
|ORIG||BECOMES|goodbye |END|Ignore this text
And this text   
|ORIG||BECOMES|idiot|END|
|ORIG| |BECOMES| hello|END||ORIG|delete this|BECOMES||END|)");
assert(replacements.size() == 4);
|ORIG| |BECOMES| hello|END||ORIG|delet^this|BECOMES||END|)";
Trie replacements(std::istringstream(std::string{ (const char*)replacementScript.c_str(), replacementScript.size() * sizeof(wchar_t) }));
std::wstring original = LR"(Don't replace this 
delete this)";
std::wstring replaced = Trie(std::move(replacements)).Replace(original);

View File

@ -17,6 +17,7 @@
#include <mutex>
#include <shared_mutex>
#include <atomic>
#include <filesystem>
#include <cstdint>
#include <cassert>
@ -33,7 +34,7 @@ struct ArrayImpl<T> { using type = T[]; };
template <typename... Ts>
using Array = typename ArrayImpl<Ts...>::type;
template <auto F> using Functor = std::integral_constant<std::decay_t<decltype(F)>, F>;
template <auto F> using Functor = std::integral_constant<std::remove_reference_t<decltype(F)>, F>;
template <typename V>
struct Identity { V operator()(V v) const { return v; } };

View File

@ -1,7 +1,6 @@
#include "common.h"
#include "defs.h"
#include "resource.h"
#include <filesystem>
#include <fstream>
#include <sstream>
#include <QApplication>

View File

@ -110,7 +110,7 @@ const char* OUT_OF_RECORDS_RETRY = u8"Textractor: out of search records, please
const char* FUNC_MISSING = u8"Textractor: function not present";
const char* MODULE_MISSING = u8"Textractor: module not present";
const char* GARBAGE_MEMORY = u8"Textractor: memory constantly changing, useless to read";
const char* SEND_ERROR = u8"Textractor: Send ERROR (likely an incorrect H-code)";
const char* SEND_ERROR = u8"Textractor: Send ERROR (likely an unstable/incorrect H-code)";
const char* READ_ERROR = u8"Textractor: Reader ERROR (likely an incorrect R-code)";
const char* HIJACK_ERROR = u8"Textractor: Hijack ERROR";
const char* COULD_NOT_FIND = u8"Textractor: could not find text";
@ -174,6 +174,7 @@ const wchar_t* REPLACER_INSTRUCTIONS = LR"(This file only does anything when the
Replacement commands must be formatted like this:
|ORIG|original_text|BECOMES|replacement_text|END|
All text in this file outside of a replacement command is ignored.
A caret (^) acts as a wildcard that matches any other single character.
Whitespace in original_text is ignored, but replacement_text can contain spaces, newlines, etc.
This file must be encoded in Unicode (UTF-16 Little Endian).)";
const char* THREAD_LINKER = u8"Thread Linker";