extract repetition filter

This commit is contained in:
Akash Mozumdar 2018-11-22 16:02:45 -05:00
parent 9f85edf704
commit c105f6700f
4 changed files with 13 additions and 13 deletions

View File

@ -38,15 +38,6 @@ void TextThread::Push(const BYTE* data, int len)
lastPushTime = GetTickCount();
}
bool TextThread::FilterRepetition(std::wstring& sentence)
{
wchar_t* end = sentence.data() + sentence.size();
for (int len = sentence.size() / 3; len > 6; --len)
if (wcsncmp(end - len * 3, end - len * 2, len) == 0 && wcsncmp(end - len * 3, end - len * 1, len) == 0)
return true | FilterRepetition(sentence = end - len);
return false;
}
void TextThread::Flush()
{
std::wstring sentence;
@ -57,7 +48,7 @@ void TextThread::Flush()
sentence = buffer;
buffer.clear();
if (FilterRepetition(sentence)) repeatingChars = std::unordered_set(sentence.begin(), sentence.end());
if (Util::RemoveRepetition(sentence)) repeatingChars = std::unordered_set(sentence.begin(), sentence.end());
else repeatingChars.clear();
}
AddSentence(sentence);

View File

@ -30,15 +30,13 @@ public:
const HookParam hp;
private:
// see https://github.com/Artikash/Textractor/issues/40
static bool FilterRepetition(std::wstring& sentence);
void Flush();
std::wstring buffer;
std::unordered_set<wchar_t> repeatingChars;
std::mutex bufferMutex;
std::wstring storage;
std::recursive_mutex storageMutex;
std::mutex storageMutex;
HANDLE deletionEvent = CreateEventW(nullptr, FALSE, FALSE, NULL);
std::thread flushThread = std::thread([&] { while (WaitForSingleObject(deletionEvent, 10) == WAIT_TIMEOUT) Flush(); });

View File

@ -34,4 +34,13 @@ namespace Util
return L"";
}
}
bool RemoveRepetition(std::wstring& text)
{
wchar_t* end = text.data() + text.size();
for (int len = text.size() / 3; len > 6; --len)
if (wcsncmp(end - len * 3, end - len * 2, len) == 0 && wcsncmp(end - len * 3, end - len * 1, len) == 0)
return true | RemoveRepetition(text = end - len);
return false;
}
}

View File

@ -6,4 +6,6 @@ namespace Util
{
std::optional<std::wstring> GetClipboardText();
std::wstring StringToWideString(std::string text, UINT encoding = CP_UTF8);
// return true if repetition found (see https://github.com/Artikash/Textractor/issues/40)
bool RemoveRepetition(std::wstring& text);
}