forked from Public-Mirror/Textractor
fix several perf issues in textthread
This commit is contained in:
parent
22bb1420c1
commit
e489c38990
@ -44,20 +44,24 @@ void TextThread::Push(const BYTE* data, int len)
|
||||
|
||||
void TextThread::Flush()
|
||||
{
|
||||
std::wstring sentence;
|
||||
{
|
||||
LOCK(threadMutex);
|
||||
std::unique_lock locker(threadMutex);
|
||||
if (buffer.empty()) return;
|
||||
if (buffer.size() < maxBufferSize && GetTickCount() - lastPushTime < flushDelay) return;
|
||||
sentence = buffer;
|
||||
if (buffer.size() > maxBufferSize || GetTickCount() - lastPushTime > flushDelay)
|
||||
{
|
||||
std::wstring sentence = buffer;
|
||||
buffer.clear();
|
||||
|
||||
bool hasRepetition = false;
|
||||
for (std::wsmatch results; std::regex_search(sentence, results, std::wregex(L"([^\\x00]{6,})\\1\\1")); hasRepetition = true) sentence = results[1];
|
||||
if (hasRepetition) repeatingChars = std::unordered_set<wchar_t>(sentence.begin(), sentence.end());
|
||||
else repeatingChars.clear();
|
||||
}
|
||||
locker.unlock(); // This algorithm might take a while
|
||||
std::unordered_set<wchar_t> repeatingChars;
|
||||
for (std::wsmatch results; std::regex_search(sentence, results, std::wregex(L"([^\\x00]{6,})\\1\\1")); sentence = results[1])
|
||||
repeatingChars = std::unordered_set(sentence.begin(), sentence.end());
|
||||
locker.lock();
|
||||
|
||||
this->repeatingChars = repeatingChars;
|
||||
|
||||
locker.unlock();
|
||||
AddSentence(sentence);
|
||||
}
|
||||
}
|
||||
|
||||
// EOF
|
||||
|
@ -14,8 +14,8 @@ public:
|
||||
|
||||
inline static OutputCallback Output;
|
||||
|
||||
inline static int flushDelay = 250; // flush every 250ms by default
|
||||
inline static int maxBufferSize = 200;
|
||||
inline static int flushDelay = 400; // flush every 400ms by default
|
||||
inline static int maxBufferSize = 1000;
|
||||
inline static int threadCounter = 0;
|
||||
|
||||
TextThread(ThreadParam tp, HookParam hp, std::wstring name);
|
||||
|
Loading…
x
Reference in New Issue
Block a user