From 523008d7e021ad56e35776e99dcfc132884b37ab Mon Sep 17 00:00:00 2001 From: Akash Mozumdar Date: Fri, 16 Nov 2018 08:34:15 -0500 Subject: [PATCH] ok, finally have a good repetition filter --- GUI/host/host.h | 2 +- GUI/host/textthread.cc | 14 +++++++++++--- GUI/host/textthread.h | 2 ++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/GUI/host/host.h b/GUI/host/host.h index 43994e8..f817b3e 100644 --- a/GUI/host/host.h +++ b/GUI/host/host.h @@ -34,7 +34,7 @@ namespace Host inline std::wstring StringToWideString(const std::string& text, UINT encoding = CP_UTF8) { std::wstring ret(text.size() + 1, 0); - if (int len = MultiByteToWideChar(encoding, 0, text.c_str(), -1, ret.data(), ret.capacity())) + if (int len = MultiByteToWideChar(encoding, 0, text.c_str(), -1, ret.data(), ret.size())) { ret.resize(len - 1); return ret; diff --git a/GUI/host/textthread.cc b/GUI/host/textthread.cc index 86c0465..36b9670 100644 --- a/GUI/host/textthread.cc +++ b/GUI/host/textthread.cc @@ -42,6 +42,15 @@ void TextThread::Push(const BYTE* data, int len) lastPushTime = GetTickCount(); } +bool TextThread::FilterRepetition(std::wstring& sentence) +{ + wchar_t* end = sentence.data() + sentence.size(); + for (int len = sentence.size() / 3; len > 6; --len) + if (wcsncmp(end - len * 3, end - len * 2, len) == 0 && wcsncmp(end - len * 3, end - len * 1, len) == 0) + return true | FilterRepetition(sentence = end - len); + return false; +} + void TextThread::Flush() { std::unique_lock locker(threadMutex); @@ -50,10 +59,9 @@ void TextThread::Flush() { std::wstring sentence = buffer; buffer.clear(); - repeatingChars.clear(); - for (std::wsmatch results; std::regex_search(sentence, results, std::wregex(L"^([^]{6,})\\1\\1")); sentence = results[1]) - repeatingChars = std::unordered_set(sentence.begin(), sentence.end()); + if (FilterRepetition(sentence)) repeatingChars = std::unordered_set(sentence.begin(), sentence.end()); + else repeatingChars.clear(); locker.unlock(); AddSentence(sentence); diff --git a/GUI/host/textthread.h b/GUI/host/textthread.h index dcfb86d..1d3ea1f 100644 --- a/GUI/host/textthread.h +++ b/GUI/host/textthread.h @@ -32,6 +32,8 @@ public: const HookParam hp; private: + // see https://github.com/Artikash/Textractor/issues/40 + static bool FilterRepetition(std::wstring& sentence); void Flush(); std::wstring buffer;