forked from Public-Mirror/Textractor
extract repetition filter
This commit is contained in:
parent
9f85edf704
commit
c105f6700f
@ -38,15 +38,6 @@ void TextThread::Push(const BYTE* data, int len)
|
|||||||
lastPushTime = GetTickCount();
|
lastPushTime = GetTickCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TextThread::FilterRepetition(std::wstring& sentence)
|
|
||||||
{
|
|
||||||
wchar_t* end = sentence.data() + sentence.size();
|
|
||||||
for (int len = sentence.size() / 3; len > 6; --len)
|
|
||||||
if (wcsncmp(end - len * 3, end - len * 2, len) == 0 && wcsncmp(end - len * 3, end - len * 1, len) == 0)
|
|
||||||
return true | FilterRepetition(sentence = end - len);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void TextThread::Flush()
|
void TextThread::Flush()
|
||||||
{
|
{
|
||||||
std::wstring sentence;
|
std::wstring sentence;
|
||||||
@ -57,7 +48,7 @@ void TextThread::Flush()
|
|||||||
sentence = buffer;
|
sentence = buffer;
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
|
|
||||||
if (FilterRepetition(sentence)) repeatingChars = std::unordered_set(sentence.begin(), sentence.end());
|
if (Util::RemoveRepetition(sentence)) repeatingChars = std::unordered_set(sentence.begin(), sentence.end());
|
||||||
else repeatingChars.clear();
|
else repeatingChars.clear();
|
||||||
}
|
}
|
||||||
AddSentence(sentence);
|
AddSentence(sentence);
|
||||||
|
@ -30,15 +30,13 @@ public:
|
|||||||
const HookParam hp;
|
const HookParam hp;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// see https://github.com/Artikash/Textractor/issues/40
|
|
||||||
static bool FilterRepetition(std::wstring& sentence);
|
|
||||||
void Flush();
|
void Flush();
|
||||||
|
|
||||||
std::wstring buffer;
|
std::wstring buffer;
|
||||||
std::unordered_set<wchar_t> repeatingChars;
|
std::unordered_set<wchar_t> repeatingChars;
|
||||||
std::mutex bufferMutex;
|
std::mutex bufferMutex;
|
||||||
std::wstring storage;
|
std::wstring storage;
|
||||||
std::recursive_mutex storageMutex;
|
std::mutex storageMutex;
|
||||||
|
|
||||||
HANDLE deletionEvent = CreateEventW(nullptr, FALSE, FALSE, NULL);
|
HANDLE deletionEvent = CreateEventW(nullptr, FALSE, FALSE, NULL);
|
||||||
std::thread flushThread = std::thread([&] { while (WaitForSingleObject(deletionEvent, 10) == WAIT_TIMEOUT) Flush(); });
|
std::thread flushThread = std::thread([&] { while (WaitForSingleObject(deletionEvent, 10) == WAIT_TIMEOUT) Flush(); });
|
||||||
|
@ -34,4 +34,13 @@ namespace Util
|
|||||||
return L"";
|
return L"";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool RemoveRepetition(std::wstring& text)
|
||||||
|
{
|
||||||
|
wchar_t* end = text.data() + text.size();
|
||||||
|
for (int len = text.size() / 3; len > 6; --len)
|
||||||
|
if (wcsncmp(end - len * 3, end - len * 2, len) == 0 && wcsncmp(end - len * 3, end - len * 1, len) == 0)
|
||||||
|
return true | RemoveRepetition(text = end - len);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
@ -6,4 +6,6 @@ namespace Util
|
|||||||
{
|
{
|
||||||
std::optional<std::wstring> GetClipboardText();
|
std::optional<std::wstring> GetClipboardText();
|
||||||
std::wstring StringToWideString(std::string text, UINT encoding = CP_UTF8);
|
std::wstring StringToWideString(std::string text, UINT encoding = CP_UTF8);
|
||||||
|
// return true if repetition found (see https://github.com/Artikash/Textractor/issues/40)
|
||||||
|
bool RemoveRepetition(std::wstring& text);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user