extract repetition filter
This commit is contained in:
parent
9f85edf704
commit
c105f6700f
@ -38,15 +38,6 @@ void TextThread::Push(const BYTE* data, int len)
|
||||
lastPushTime = GetTickCount();
|
||||
}
|
||||
|
||||
bool TextThread::FilterRepetition(std::wstring& sentence)
|
||||
{
|
||||
wchar_t* end = sentence.data() + sentence.size();
|
||||
for (int len = sentence.size() / 3; len > 6; --len)
|
||||
if (wcsncmp(end - len * 3, end - len * 2, len) == 0 && wcsncmp(end - len * 3, end - len * 1, len) == 0)
|
||||
return true | FilterRepetition(sentence = end - len);
|
||||
return false;
|
||||
}
|
||||
|
||||
void TextThread::Flush()
|
||||
{
|
||||
std::wstring sentence;
|
||||
@ -57,7 +48,7 @@ void TextThread::Flush()
|
||||
sentence = buffer;
|
||||
buffer.clear();
|
||||
|
||||
if (FilterRepetition(sentence)) repeatingChars = std::unordered_set(sentence.begin(), sentence.end());
|
||||
if (Util::RemoveRepetition(sentence)) repeatingChars = std::unordered_set(sentence.begin(), sentence.end());
|
||||
else repeatingChars.clear();
|
||||
}
|
||||
AddSentence(sentence);
|
||||
|
@ -30,15 +30,13 @@ public:
|
||||
const HookParam hp;
|
||||
|
||||
private:
|
||||
// see https://github.com/Artikash/Textractor/issues/40
|
||||
static bool FilterRepetition(std::wstring& sentence);
|
||||
void Flush();
|
||||
|
||||
std::wstring buffer;
|
||||
std::unordered_set<wchar_t> repeatingChars;
|
||||
std::mutex bufferMutex;
|
||||
std::wstring storage;
|
||||
std::recursive_mutex storageMutex;
|
||||
std::mutex storageMutex;
|
||||
|
||||
HANDLE deletionEvent = CreateEventW(nullptr, FALSE, FALSE, NULL);
|
||||
std::thread flushThread = std::thread([&] { while (WaitForSingleObject(deletionEvent, 10) == WAIT_TIMEOUT) Flush(); });
|
||||
|
@ -34,4 +34,13 @@ namespace Util
|
||||
return L"";
|
||||
}
|
||||
}
|
||||
|
||||
bool RemoveRepetition(std::wstring& text)
|
||||
{
|
||||
wchar_t* end = text.data() + text.size();
|
||||
for (int len = text.size() / 3; len > 6; --len)
|
||||
if (wcsncmp(end - len * 3, end - len * 2, len) == 0 && wcsncmp(end - len * 3, end - len * 1, len) == 0)
|
||||
return true | RemoveRepetition(text = end - len);
|
||||
return false;
|
||||
}
|
||||
}
|
@ -6,4 +6,6 @@ namespace Util
|
||||
{
|
||||
std::optional<std::wstring> GetClipboardText();
|
||||
std::wstring StringToWideString(std::string text, UINT encoding = CP_UTF8);
|
||||
// return true if repetition found (see https://github.com/Artikash/Textractor/issues/40)
|
||||
bool RemoveRepetition(std::wstring& text);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user