Textractor_test/extensions/replacer.cpp

139 lines
4.0 KiB
C++
Raw Normal View History

2019-01-24 05:11:14 +08:00
#include "extension.h"
#include <cwctype>
#include <fstream>
2019-01-24 21:32:21 +08:00
#include <filesystem>
#include <process.h>
2019-01-24 05:11:14 +08:00
2019-02-28 00:33:17 +08:00
extern const wchar_t* REPLACER_INSTRUCTIONS;
2019-07-03 20:32:32 +08:00
2019-02-19 12:12:12 +08:00
constexpr auto REPLACE_SAVE_FILE = u8"SavedReplacements.txt";
2019-07-03 20:32:32 +08:00
std::atomic<std::filesystem::file_time_type> replaceFileLastWrite = {};
2019-02-28 14:40:40 +08:00
std::shared_mutex m;
2019-01-24 05:11:14 +08:00
2019-07-03 20:32:32 +08:00
class Trie
2019-01-24 05:11:14 +08:00
{
public:
2019-07-03 20:32:32 +08:00
Trie(const std::unordered_map<std::wstring, std::wstring>& replacements)
2019-01-24 05:11:14 +08:00
{
2019-07-03 20:32:32 +08:00
for (const auto& [original, replacement] : replacements)
{
Node* current = &root;
for (auto ch : original)
if (Ignore(ch));
else if (auto& next = current->next[ch]) current = next.get();
else current = (next = std::make_unique<Node>()).get();
if (current != &root) current->value = replacement;
}
2019-01-24 05:11:14 +08:00
}
2019-07-03 20:32:32 +08:00
std::wstring Replace(const std::wstring& sentence) const
2019-01-24 05:11:14 +08:00
{
2019-07-03 20:32:32 +08:00
std::wstring result;
for (int i = 0; i < sentence.size();)
{
2019-07-03 20:32:32 +08:00
std::wstring replacement(1, sentence[i]);
int originalLength = 1;
const Node* current = &root;
for (int j = i; j < sentence.size() + 1; ++j)
{
2019-07-03 20:32:32 +08:00
if (current->value)
{
replacement = current->value.value();
originalLength = j - i;
}
if (current->next.count(sentence[j]) > 0) current = current->next.at(sentence[j]).get();
else if (Ignore(sentence[j]));
else break;
}
2019-07-03 20:32:32 +08:00
result += replacement;
i += originalLength;
}
return result;
2019-01-24 05:11:14 +08:00
}
private:
2019-02-18 08:14:49 +08:00
static bool Ignore(wchar_t ch)
2019-01-24 05:11:14 +08:00
{
2019-02-18 08:14:49 +08:00
return ch <= 0x20 || std::iswspace(ch);
2019-01-24 05:11:14 +08:00
}
struct Node
{
2019-08-08 02:05:50 +08:00
std::unordered_map<wchar_t, std::unique_ptr<Node>, Identity<wchar_t>> next;
2019-02-21 11:14:32 +08:00
std::optional<std::wstring> value;
2019-01-24 05:11:14 +08:00
} root;
2019-07-03 20:32:32 +08:00
} trie = { {} };
2019-01-24 05:11:14 +08:00
2019-07-03 20:32:32 +08:00
std::unordered_map<std::wstring, std::wstring> Parse(const std::wstring& replacementScript)
2019-01-24 05:11:14 +08:00
{
2019-07-03 20:32:32 +08:00
std::unordered_map<std::wstring, std::wstring> replacements;
2019-01-24 05:11:14 +08:00
size_t end = 0;
while (true)
{
2019-07-03 20:32:32 +08:00
size_t original = replacementScript.find(L"|ORIG|", end);
size_t becomes = replacementScript.find(L"|BECOMES|", original);
if ((end = replacementScript.find(L"|END|", becomes)) == std::wstring::npos) break;
replacements[replacementScript.substr(original + 6, becomes - original - 6)] = replacementScript.substr(becomes + 9, end - becomes - 9);
2019-02-21 11:14:32 +08:00
}
2019-07-03 20:32:32 +08:00
return replacements;
2019-01-24 05:11:14 +08:00
}
BOOL WINAPI DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved)
{
switch (ul_reason_for_call)
{
case DLL_PROCESS_ATTACH:
{
2019-01-24 21:32:21 +08:00
std::vector<BYTE> file(std::istreambuf_iterator<char>(std::ifstream(REPLACE_SAVE_FILE, std::ios::in | std::ios::binary)), {});
2019-07-03 20:32:32 +08:00
if (Parse(std::wstring((wchar_t*)file.data(), file.size() / sizeof(wchar_t))).empty())
2019-01-24 21:32:21 +08:00
{
std::ofstream(REPLACE_SAVE_FILE, std::ios::out | std::ios::binary | std::ios::trunc).write((char*)REPLACER_INSTRUCTIONS, wcslen(REPLACER_INSTRUCTIONS) * sizeof(wchar_t));
_spawnlp(_P_DETACH, "notepad", "notepad", REPLACE_SAVE_FILE, NULL); // show file to user
}
2019-01-24 05:11:14 +08:00
}
break;
case DLL_PROCESS_DETACH:
{
}
break;
}
return TRUE;
}
bool ProcessSentence(std::wstring& sentence, SentenceInfo)
{
2019-07-03 20:32:32 +08:00
try
2019-01-24 21:32:21 +08:00
{
2019-07-03 20:32:32 +08:00
static_assert(std::has_unique_object_representations_v<decltype(replaceFileLastWrite)::value_type>);
if (replaceFileLastWrite.exchange(std::filesystem::last_write_time(REPLACE_SAVE_FILE)) != std::filesystem::last_write_time(REPLACE_SAVE_FILE))
{
std::scoped_lock l(m);
std::vector<BYTE> file(std::istreambuf_iterator<char>(std::ifstream(REPLACE_SAVE_FILE, std::ios::in | std::ios::binary)), {});
trie = Trie(Parse(std::wstring((wchar_t*)file.data(), file.size() / sizeof(wchar_t))));
}
2019-01-24 21:32:21 +08:00
}
2019-07-03 20:32:32 +08:00
catch (std::filesystem::filesystem_error) {}
2019-01-24 21:32:21 +08:00
2019-07-03 20:32:32 +08:00
std::shared_lock l(m);
sentence = trie.Replace(sentence);
return true;
2019-01-24 05:11:14 +08:00
}
2019-07-03 20:32:32 +08:00
TEST(
2019-01-24 05:11:14 +08:00
{
2019-07-03 20:32:32 +08:00
auto replacements = Parse(LR"(
|ORIG||BECOMES|goodbye |END|Ignore this text
And this text   
2019-01-24 05:11:14 +08:00
|ORIG||BECOMES|idiot|END|
2019-07-03 20:32:32 +08:00
|ORIG| |BECOMES| hello|END||ORIG|delete this|BECOMES||END|)");
assert(replacements.size() == 4);
std::wstring original = LR"(Don't replace this 
delete this)";
std::wstring replaced = Trie(replacements).Replace(original);
assert(replaced == L"Don't replace thisgoodbye idiot hello");
2019-01-24 05:11:14 +08:00
}
);