mirror of
https://github.com/Artikash/Textractor.git
synced 2024-12-23 17:04:12 +08:00
add cyclic repetition detection in textthread
This commit is contained in:
parent
abba2d77e3
commit
3ad311293f
@ -6,6 +6,7 @@
|
|||||||
#include "host.h"
|
#include "host.h"
|
||||||
#include "const.h"
|
#include "const.h"
|
||||||
#include <regex>
|
#include <regex>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
TextThread::TextThread(ThreadParam tp, DWORD status) : handle(threadCounter++), name(Host::GetHookName(tp.pid, tp.hook)), tp(tp), status(status) {}
|
TextThread::TextThread(ThreadParam tp, DWORD status) : handle(threadCounter++), name(Host::GetHookName(tp.pid, tp.hook)), tp(tp), status(status) {}
|
||||||
|
|
||||||
@ -31,6 +32,11 @@ void TextThread::Flush()
|
|||||||
if (buffer.size() < maxBufferSize && GetTickCount() - timestamp < flushDelay) return;
|
if (buffer.size() < maxBufferSize && GetTickCount() - timestamp < flushDelay) return;
|
||||||
sentence = buffer;
|
sentence = buffer;
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
|
|
||||||
|
bool hasRepetition = false;
|
||||||
|
for (std::wsmatch results; std::regex_search(sentence, results, std::wregex(L"([^\\x00]{6,})\\1\\1")); hasRepetition = true) sentence = results[1];
|
||||||
|
if (hasRepetition) repeatingChars = std::unordered_set<wchar_t>(sentence.begin(), sentence.end());
|
||||||
|
else repeatingChars.clear();
|
||||||
}
|
}
|
||||||
AddSentence(sentence);
|
AddSentence(sentence);
|
||||||
}
|
}
|
||||||
@ -51,6 +57,7 @@ void TextThread::AddText(const BYTE* data, int len)
|
|||||||
buffer += status & USING_UNICODE
|
buffer += status & USING_UNICODE
|
||||||
? std::wstring((wchar_t*)data, len / 2)
|
? std::wstring((wchar_t*)data, len / 2)
|
||||||
: StringToWideString(std::string((char*)data, len), status & USING_UTF8 ? CP_UTF8 : SHIFT_JIS);
|
: StringToWideString(std::string((char*)data, len), status & USING_UTF8 ? CP_UTF8 : SHIFT_JIS);
|
||||||
|
if (std::all_of(buffer.begin(), buffer.end(), [&](wchar_t c) { return repeatingChars.count(c) > 0; })) buffer.clear();
|
||||||
timestamp = GetTickCount();
|
timestamp = GetTickCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "types.h"
|
#include "types.h"
|
||||||
|
#include <unordered_set>
|
||||||
|
|
||||||
class TextThread
|
class TextThread
|
||||||
{
|
{
|
||||||
@ -34,6 +35,7 @@ private:
|
|||||||
|
|
||||||
std::wstring buffer;
|
std::wstring buffer;
|
||||||
std::wstring storage;
|
std::wstring storage;
|
||||||
|
std::unordered_set<wchar_t> repeatingChars;
|
||||||
std::recursive_mutex ttMutex;
|
std::recursive_mutex ttMutex;
|
||||||
DWORD status;
|
DWORD status;
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user