mirror of
https://github.com/Artikash/Textractor.git
synced 2025-01-11 01:59:14 +08:00
implement replacer wildcard and optimize memory usage when loading files
This commit is contained in:
parent
b0eeca5b36
commit
b61272a5e6
@ -2,7 +2,6 @@
|
|||||||
#include "defs.h"
|
#include "defs.h"
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
#include "../texthook/texthook.h"
|
#include "../texthook/texthook.h"
|
||||||
#include <filesystem>
|
|
||||||
|
|
||||||
extern const wchar_t* ALREADY_INJECTED;
|
extern const wchar_t* ALREADY_INJECTED;
|
||||||
extern const wchar_t* NEED_32_BIT;
|
extern const wchar_t* NEED_32_BIT;
|
||||||
|
55
extensions/blockmarkup.h
Normal file
55
extensions/blockmarkup.h
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
#include <istream>
|
||||||
|
|
||||||
|
template <typename C, int DelimiterCount, int bufferStartSize = 200>
|
||||||
|
class BlockMarkupIterator
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
BlockMarkupIterator(std::istreambuf_iterator<char> it, const std::basic_string_view<C> (&delimiters)[DelimiterCount]) :
|
||||||
|
it(it)
|
||||||
|
{
|
||||||
|
std::copy_n(delimiters, DelimiterCount, this->delimiters.begin());
|
||||||
|
}
|
||||||
|
std::optional<std::array<std::basic_string<C>, DelimiterCount>> Next()
|
||||||
|
{
|
||||||
|
std::array<std::basic_string<C>, DelimiterCount> results;
|
||||||
|
std::basic_string<C> buffer;
|
||||||
|
buffer.reserve(bufferStartSize);
|
||||||
|
Find(buffer, delimiters[0]);
|
||||||
|
buffer.clear();
|
||||||
|
for (int i = 0; i < DelimiterCount; ++i)
|
||||||
|
{
|
||||||
|
const auto delimiter = i + 1 < DelimiterCount ? delimiters[i + 1] : end;
|
||||||
|
if (!Find(buffer, delimiter)) return {};
|
||||||
|
buffer.erase(buffer.size() - delimiter.size());
|
||||||
|
results[i] = std::move(buffer);
|
||||||
|
(buffer = {}).reserve(bufferStartSize);
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool Find(std::basic_string<C>& result, std::basic_string_view<C> delimiter)
|
||||||
|
{
|
||||||
|
while (Read((result += C{}).back())) if (result.back() == '|' && result.find(delimiter, result.size() - delimiter.size()) != std::string::npos) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Read(C& out)
|
||||||
|
{
|
||||||
|
BYTE buffer[sizeof(C)];
|
||||||
|
for (int i = 0; i < sizeof(C); ++i, ++it)
|
||||||
|
if (it.equal({})) return false;
|
||||||
|
else buffer[i] = *it;
|
||||||
|
out = reinterpret_cast<C&>(buffer);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr C endImpl[5] = { '|', 'E', 'N', 'D', '|' };
|
||||||
|
static constexpr std::basic_string_view end{ endImpl, 5 };
|
||||||
|
|
||||||
|
std::istreambuf_iterator<char> it;
|
||||||
|
std::array<std::basic_string_view<C>, DelimiterCount> delimiters;
|
||||||
|
};
|
@ -2,8 +2,8 @@
|
|||||||
#include "extension.h"
|
#include "extension.h"
|
||||||
#include "ui_extrawindow.h"
|
#include "ui_extrawindow.h"
|
||||||
#include "defs.h"
|
#include "defs.h"
|
||||||
|
#include "blockmarkup.h"
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <filesystem>
|
|
||||||
#include <process.h>
|
#include <process.h>
|
||||||
#include <QColorDialog>
|
#include <QColorDialog>
|
||||||
#include <QFontDialog>
|
#include <QFontDialog>
|
||||||
@ -281,32 +281,31 @@ private:
|
|||||||
catch (std::filesystem::filesystem_error) { return; }
|
catch (std::filesystem::filesystem_error) { return; }
|
||||||
|
|
||||||
dictionary.clear();
|
dictionary.clear();
|
||||||
owningStorage.clear();
|
charStorage.clear();
|
||||||
|
|
||||||
auto StoreCopy = [&](const std::string& string)
|
auto StoreCopy = [&](std::string_view string)
|
||||||
{
|
{
|
||||||
return &*owningStorage.insert(owningStorage.end(), string.c_str(), string.c_str() + string.size() + 1);
|
auto location = &*charStorage.insert(charStorage.end(), string.begin(), string.end());
|
||||||
|
charStorage.push_back(0);
|
||||||
|
return location;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::string savedDictionary(std::istreambuf_iterator(std::ifstream(DICTIONARY_SAVE_FILE)), {});
|
charStorage.reserve(std::filesystem::file_size(DICTIONARY_SAVE_FILE));
|
||||||
owningStorage.reserve(savedDictionary.size());
|
std::ifstream stream(DICTIONARY_SAVE_FILE);
|
||||||
for (size_t end = 0; ;)
|
BlockMarkupIterator savedDictionary(stream, Array<std::string_view>{ "|TERM|", "|DEFINITION|" });
|
||||||
|
while (auto read = savedDictionary.Next())
|
||||||
{
|
{
|
||||||
size_t term = savedDictionary.find("|TERM|", end);
|
const auto& [terms, definition] = *read;
|
||||||
size_t definition = savedDictionary.find("|DEFINITION|", term);
|
auto storedDefinition = StoreCopy(definition);
|
||||||
if ((end = savedDictionary.find("|END|", definition)) == std::string::npos) break;
|
std::string_view termsView = terms;
|
||||||
auto storedDefinition = StoreCopy(savedDictionary.substr(definition + 12, end - definition - 12));
|
size_t start = 0, end = termsView.find("|TERM|");
|
||||||
for (size_t next; (next = savedDictionary.find("|TERM|", term + 1)) != std::string::npos && next < definition; term = next)
|
while (end != std::string::npos)
|
||||||
dictionary.push_back({ StoreCopy(savedDictionary.substr(term + 6, next - term - 6)), storedDefinition });
|
{
|
||||||
dictionary.push_back({ StoreCopy(savedDictionary.substr(term + 6, definition - term - 6)), storedDefinition });
|
dictionary.push_back(DictionaryEntry{ StoreCopy(termsView.substr(start, end - start)), storedDefinition });
|
||||||
|
start = end + 6;
|
||||||
|
end = termsView.find("|TERM|", start);
|
||||||
}
|
}
|
||||||
auto oldData = owningStorage.data();
|
dictionary.push_back(DictionaryEntry{ StoreCopy(termsView.substr(start)), storedDefinition });
|
||||||
owningStorage.shrink_to_fit();
|
|
||||||
dictionary.shrink_to_fit();
|
|
||||||
for (auto& [term, definition] : dictionary)
|
|
||||||
{
|
|
||||||
term += owningStorage.data() - oldData;
|
|
||||||
definition += owningStorage.data() - oldData;
|
|
||||||
}
|
}
|
||||||
std::sort(dictionary.begin(), dictionary.end());
|
std::sort(dictionary.begin(), dictionary.end());
|
||||||
}
|
}
|
||||||
@ -354,7 +353,7 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::filesystem::file_time_type dictionaryFileLastWrite;
|
std::filesystem::file_time_type dictionaryFileLastWrite;
|
||||||
std::vector<char> owningStorage;
|
std::vector<char> charStorage;
|
||||||
std::vector<QString> definitions;
|
std::vector<QString> definitions;
|
||||||
int definitionIndex;
|
int definitionIndex;
|
||||||
} dictionaryWindow;
|
} dictionaryWindow;
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
#include "extension.h"
|
#include "extension.h"
|
||||||
|
#include "blockmarkup.h"
|
||||||
#include <cwctype>
|
#include <cwctype>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <filesystem>
|
#include <sstream>
|
||||||
#include <process.h>
|
#include <process.h>
|
||||||
|
|
||||||
extern const wchar_t* REPLACER_INSTRUCTIONS;
|
extern const wchar_t* REPLACER_INSTRUCTIONS;
|
||||||
@ -14,14 +15,16 @@ std::shared_mutex m;
|
|||||||
class Trie
|
class Trie
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
Trie(std::unordered_map<std::wstring, std::wstring> replacements)
|
Trie(const std::istream& replacementScript)
|
||||||
{
|
{
|
||||||
for (const auto& [original, replacement] : replacements)
|
BlockMarkupIterator replacementScriptParser(replacementScript.rdbuf(), Array<std::wstring_view>{ L"|ORIG|", L"|BECOMES|" });
|
||||||
|
while (auto read = replacementScriptParser.Next())
|
||||||
{
|
{
|
||||||
|
const auto& [original, replacement] = *read;
|
||||||
Node* current = &root;
|
Node* current = &root;
|
||||||
for (auto ch : original) if (!Ignore(ch)) current = Next(current, ch);
|
for (auto ch : original) if (!Ignore(ch)) current = Next(current, ch);
|
||||||
if (current != &root)
|
if (current != &root)
|
||||||
current->value = owningStorage.insert(owningStorage.end(), replacement.c_str(), replacement.c_str() + replacement.size() + 1) - owningStorage.begin();
|
current->value = charStorage.insert(charStorage.end(), replacement.c_str(), replacement.c_str() + replacement.size() + 1) - charStorage.begin();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -38,10 +41,10 @@ public:
|
|||||||
{
|
{
|
||||||
if (current->value >= 0)
|
if (current->value >= 0)
|
||||||
{
|
{
|
||||||
replacement = owningStorage.data() + current->value;
|
replacement = charStorage.data() + current->value;
|
||||||
originalLength = j - i;
|
originalLength = j - i;
|
||||||
}
|
}
|
||||||
if (!Ignore(sentence[j])) current = Next(current, sentence[j]);
|
if (!Ignore(sentence[j])) current = Next(current, sentence[j]) ? Next(current, sentence[j]) : Next(current, L'^');
|
||||||
}
|
}
|
||||||
|
|
||||||
result += replacement;
|
result += replacement;
|
||||||
@ -76,30 +79,16 @@ private:
|
|||||||
ptrdiff_t value = -1;
|
ptrdiff_t value = -1;
|
||||||
} root;
|
} root;
|
||||||
|
|
||||||
std::vector<wchar_t> owningStorage;
|
std::vector<wchar_t> charStorage;
|
||||||
} trie = { {} };
|
} trie = { std::istringstream("") };
|
||||||
|
|
||||||
std::unordered_map<std::wstring, std::wstring> Parse(std::wstring_view replacementScript)
|
|
||||||
{
|
|
||||||
std::unordered_map<std::wstring, std::wstring> replacements;
|
|
||||||
for (size_t end = 0; ;)
|
|
||||||
{
|
|
||||||
size_t original = replacementScript.find(L"|ORIG|", end);
|
|
||||||
size_t becomes = replacementScript.find(L"|BECOMES|", original);
|
|
||||||
if ((end = replacementScript.find(L"|END|", becomes)) == std::wstring::npos) break;
|
|
||||||
replacements[std::wstring(replacementScript.substr(original + 6, becomes - original - 6))] = replacementScript.substr(becomes + 9, end - becomes - 9);
|
|
||||||
}
|
|
||||||
return replacements;
|
|
||||||
}
|
|
||||||
|
|
||||||
void UpdateReplacements()
|
void UpdateReplacements()
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
if (replaceFileLastWrite.exchange(std::filesystem::last_write_time(REPLACE_SAVE_FILE)) == std::filesystem::last_write_time(REPLACE_SAVE_FILE)) return;
|
if (replaceFileLastWrite.exchange(std::filesystem::last_write_time(REPLACE_SAVE_FILE)) == std::filesystem::last_write_time(REPLACE_SAVE_FILE)) return;
|
||||||
std::vector<BYTE> file(std::istreambuf_iterator(std::ifstream(REPLACE_SAVE_FILE, std::ios::binary)), {});
|
|
||||||
std::scoped_lock l(m);
|
std::scoped_lock l(m);
|
||||||
trie = Trie(Parse({ (wchar_t*)file.data(), file.size() / sizeof(wchar_t) }));
|
trie = Trie(std::ifstream(REPLACE_SAVE_FILE, std::ios::binary));
|
||||||
}
|
}
|
||||||
catch (std::filesystem::filesystem_error) { replaceFileLastWrite.store({}); }
|
catch (std::filesystem::filesystem_error) { replaceFileLastWrite.store({}); }
|
||||||
}
|
}
|
||||||
@ -138,12 +127,12 @@ bool ProcessSentence(std::wstring& sentence, SentenceInfo)
|
|||||||
|
|
||||||
TEST(
|
TEST(
|
||||||
{
|
{
|
||||||
auto replacements = Parse(LR"(
|
std::wstring replacementScript = LR"(
|
||||||
|ORIG|さよなら|BECOMES|goodbye |END|Ignore this text
|
|ORIG|さよなら|BECOMES|goodbye |END|Ignore this text
|
||||||
And this text ツ
|
And this text ツ
|
||||||
|ORIG|バカ|BECOMES|idiot|END|
|
|ORIG|バカ|BECOMES|idiot|END|
|
||||||
|ORIG|こんにちは |BECOMES| hello|END||ORIG|delete this|BECOMES||END|)");
|
|ORIG|こんにちは |BECOMES| hello|END||ORIG|delet^this|BECOMES||END|)";
|
||||||
assert(replacements.size() == 4);
|
Trie replacements(std::istringstream(std::string{ (const char*)replacementScript.c_str(), replacementScript.size() * sizeof(wchar_t) }));
|
||||||
std::wstring original = LR"(Don't replace this
|
std::wstring original = LR"(Don't replace this
|
||||||
さよなら バカ こんにちは delete this)";
|
さよなら バカ こんにちは delete this)";
|
||||||
std::wstring replaced = Trie(std::move(replacements)).Replace(original);
|
std::wstring replaced = Trie(std::move(replacements)).Replace(original);
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <shared_mutex>
|
#include <shared_mutex>
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
|
#include <filesystem>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
|
||||||
@ -33,7 +34,7 @@ struct ArrayImpl<T> { using type = T[]; };
|
|||||||
template <typename... Ts>
|
template <typename... Ts>
|
||||||
using Array = typename ArrayImpl<Ts...>::type;
|
using Array = typename ArrayImpl<Ts...>::type;
|
||||||
|
|
||||||
template <auto F> using Functor = std::integral_constant<std::decay_t<decltype(F)>, F>;
|
template <auto F> using Functor = std::integral_constant<std::remove_reference_t<decltype(F)>, F>;
|
||||||
|
|
||||||
template <typename V>
|
template <typename V>
|
||||||
struct Identity { V operator()(V v) const { return v; } };
|
struct Identity { V operator()(V v) const { return v; } };
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "defs.h"
|
#include "defs.h"
|
||||||
#include "resource.h"
|
#include "resource.h"
|
||||||
#include <filesystem>
|
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <QApplication>
|
#include <QApplication>
|
||||||
|
3
text.cpp
3
text.cpp
@ -110,7 +110,7 @@ const char* OUT_OF_RECORDS_RETRY = u8"Textractor: out of search records, please
|
|||||||
const char* FUNC_MISSING = u8"Textractor: function not present";
|
const char* FUNC_MISSING = u8"Textractor: function not present";
|
||||||
const char* MODULE_MISSING = u8"Textractor: module not present";
|
const char* MODULE_MISSING = u8"Textractor: module not present";
|
||||||
const char* GARBAGE_MEMORY = u8"Textractor: memory constantly changing, useless to read";
|
const char* GARBAGE_MEMORY = u8"Textractor: memory constantly changing, useless to read";
|
||||||
const char* SEND_ERROR = u8"Textractor: Send ERROR (likely an incorrect H-code)";
|
const char* SEND_ERROR = u8"Textractor: Send ERROR (likely an unstable/incorrect H-code)";
|
||||||
const char* READ_ERROR = u8"Textractor: Reader ERROR (likely an incorrect R-code)";
|
const char* READ_ERROR = u8"Textractor: Reader ERROR (likely an incorrect R-code)";
|
||||||
const char* HIJACK_ERROR = u8"Textractor: Hijack ERROR";
|
const char* HIJACK_ERROR = u8"Textractor: Hijack ERROR";
|
||||||
const char* COULD_NOT_FIND = u8"Textractor: could not find text";
|
const char* COULD_NOT_FIND = u8"Textractor: could not find text";
|
||||||
@ -174,6 +174,7 @@ const wchar_t* REPLACER_INSTRUCTIONS = LR"(This file only does anything when the
|
|||||||
Replacement commands must be formatted like this:
|
Replacement commands must be formatted like this:
|
||||||
|ORIG|original_text|BECOMES|replacement_text|END|
|
|ORIG|original_text|BECOMES|replacement_text|END|
|
||||||
All text in this file outside of a replacement command is ignored.
|
All text in this file outside of a replacement command is ignored.
|
||||||
|
A caret (^) acts as a wildcard that matches any other single character.
|
||||||
Whitespace in original_text is ignored, but replacement_text can contain spaces, newlines, etc.
|
Whitespace in original_text is ignored, but replacement_text can contain spaces, newlines, etc.
|
||||||
This file must be encoded in Unicode (UTF-16 Little Endian).)";
|
This file must be encoded in Unicode (UTF-16 Little Endian).)";
|
||||||
const char* THREAD_LINKER = u8"Thread Linker";
|
const char* THREAD_LINKER = u8"Thread Linker";
|
||||||
|
Loading…
x
Reference in New Issue
Block a user