Textractor_test/extensions/replacer.cpp

143 lines
4.0 KiB
C++
Raw Normal View History

2019-01-24 05:11:14 +08:00
#include "extension.h"
#include "blockmarkup.h"
2019-01-24 05:11:14 +08:00
#include <cwctype>
#include <fstream>
#include <sstream>
2019-01-24 21:32:21 +08:00
#include <process.h>
2019-01-24 05:11:14 +08:00
2019-02-28 00:33:17 +08:00
extern const wchar_t* REPLACER_INSTRUCTIONS;
2019-07-03 20:32:32 +08:00
2019-02-19 12:12:12 +08:00
constexpr auto REPLACE_SAVE_FILE = u8"SavedReplacements.txt";
2019-07-03 20:32:32 +08:00
std::atomic<std::filesystem::file_time_type> replaceFileLastWrite = {};
2021-06-05 16:58:53 +08:00
concurrency::reader_writer_lock m;
2019-01-24 05:11:14 +08:00
2019-07-03 20:32:32 +08:00
class Trie
2019-01-24 05:11:14 +08:00
{
public:
Trie(const std::istream& replacementScript)
2019-01-24 05:11:14 +08:00
{
2020-02-26 15:59:47 +08:00
BlockMarkupIterator replacementScriptParser(replacementScript, Array<std::wstring_view>{ L"|ORIG|", L"|BECOMES|" });
while (auto read = replacementScriptParser.Next())
2019-07-03 20:32:32 +08:00
{
2020-02-26 15:59:47 +08:00
const auto& [original, replacement] = read.value();
2019-07-03 20:32:32 +08:00
Node* current = &root;
for (auto ch : original) if (!Ignore(ch)) current = Next(current, ch);
if (current != &root)
current->value = charStorage.insert(charStorage.end(), replacement.c_str(), replacement.c_str() + replacement.size() + 1) - charStorage.begin();
2019-07-03 20:32:32 +08:00
}
2019-01-24 05:11:14 +08:00
}
2019-07-03 20:32:32 +08:00
std::wstring Replace(const std::wstring& sentence) const
2019-01-24 05:11:14 +08:00
{
2019-07-03 20:32:32 +08:00
std::wstring result;
for (int i = 0; i < sentence.size();)
{
std::wstring_view replacement(sentence.c_str() + i, 1);
2019-07-03 20:32:32 +08:00
int originalLength = 1;
const Node* current = &root;
for (int j = i; current && j <= sentence.size(); ++j)
{
if (current->value >= 0)
2019-07-03 20:32:32 +08:00
{
replacement = charStorage.data() + current->value;
2019-07-03 20:32:32 +08:00
originalLength = j - i;
}
if (!Ignore(sentence[j])) current = Next(current, sentence[j]) ? Next(current, sentence[j]) : Next(current, L'^');
}
2019-07-03 20:32:32 +08:00
result += replacement;
i += originalLength;
}
return result;
2019-01-24 05:11:14 +08:00
}
bool Empty()
{
return root.charMap.empty();
}
2019-01-24 05:11:14 +08:00
private:
2019-02-18 08:14:49 +08:00
static bool Ignore(wchar_t ch)
2019-01-24 05:11:14 +08:00
{
2020-09-10 21:12:50 +08:00
return ch <= 0x20 || iswspace(ch);
2019-01-24 05:11:14 +08:00
}
template <typename Node>
static Node* Next(Node* node, wchar_t ch)
{
auto it = std::lower_bound(node->charMap.begin(), node->charMap.end(), ch, [](const auto& one, auto two) { return one.first < two; });
if (it != node->charMap.end() && it->first == ch) return it->second.get();
if constexpr (!std::is_const_v<Node>) return node->charMap.insert(it, { ch, std::make_unique<Node>() })->second.get();
return nullptr;
}
2019-01-24 05:11:14 +08:00
struct Node
{
std::vector<std::pair<wchar_t, std::unique_ptr<Node>>> charMap;
ptrdiff_t value = -1;
2019-01-24 05:11:14 +08:00
} root;
std::vector<wchar_t> charStorage;
} trie = { std::istringstream("") };
2019-01-24 05:11:14 +08:00
void UpdateReplacements()
{
try
{
if (replaceFileLastWrite.exchange(std::filesystem::last_write_time(REPLACE_SAVE_FILE)) == std::filesystem::last_write_time(REPLACE_SAVE_FILE)) return;
std::scoped_lock lock(m);
trie = Trie(std::ifstream(REPLACE_SAVE_FILE, std::ios::binary));
}
catch (std::filesystem::filesystem_error) { replaceFileLastWrite.store({}); }
}
2019-01-24 05:11:14 +08:00
BOOL WINAPI DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved)
{
switch (ul_reason_for_call)
{
case DLL_PROCESS_ATTACH:
{
UpdateReplacements();
if (trie.Empty())
2019-01-24 21:32:21 +08:00
{
2019-09-10 10:08:24 +08:00
auto file = std::ofstream(REPLACE_SAVE_FILE, std::ios::binary) << "\xff\xfe";
for (auto ch : std::wstring_view(REPLACER_INSTRUCTIONS))
file << (ch == L'\n' ? std::string_view("\r\0\n", 4) : std::string_view((char*)&ch, 2));
2021-11-14 06:49:07 +08:00
SpawnThread([] { _spawnlp(_P_DETACH, "notepad", "notepad", REPLACE_SAVE_FILE, NULL); }); // show file to user
2019-01-24 21:32:21 +08:00
}
2019-01-24 05:11:14 +08:00
}
break;
case DLL_PROCESS_DETACH:
{
}
break;
}
return TRUE;
}
bool ProcessSentence(std::wstring& sentence, SentenceInfo)
{
UpdateReplacements();
2019-01-24 21:32:21 +08:00
2021-06-05 16:58:53 +08:00
concurrency::reader_writer_lock::scoped_lock_read readLock(m);
2019-07-03 20:32:32 +08:00
sentence = trie.Replace(sentence);
return true;
2019-01-24 05:11:14 +08:00
}
2019-07-03 20:32:32 +08:00
TEST(
2019-01-24 05:11:14 +08:00
{
std::wstring replacementScript = LR"(
2019-07-03 20:32:32 +08:00
|ORIG||BECOMES|goodbye |END|Ignore this text
And this text   
2019-01-24 05:11:14 +08:00
|ORIG||BECOMES|idiot|END|
|ORIG| |BECOMES| hello|END||ORIG|delet^this|BECOMES||END|)";
Trie replacements(std::istringstream(std::string{ (const char*)replacementScript.c_str(), replacementScript.size() * sizeof(wchar_t) }));
2019-07-03 20:32:32 +08:00
std::wstring original = LR"(Don't replace this 
delete this)";
std::wstring replaced = Trie(std::move(replacements)).Replace(original);
2019-07-03 20:32:32 +08:00
assert(replaced == L"Don't replace thisgoodbye idiot hello");
2019-01-24 05:11:14 +08:00
}
);