Textractor/GUI/host/textthread.cpp

82 lines
2.6 KiB
C++
Raw Normal View History

#include "textthread.h"
#include "host.h"
#include "util.h"
2019-02-27 11:33:17 -05:00
extern const wchar_t* INVALID_CODEPAGE;
2018-12-13 22:44:55 -05:00
TextThread::TextThread(ThreadParam tp, HookParam hp, std::optional<std::wstring> name) :
handle(threadCounter++),
name(name.value_or(Util::StringToWideString(hp.name).value())),
tp(tp),
2018-12-03 18:29:30 -05:00
hp(hp)
2019-02-04 15:18:47 -05:00
{}
void TextThread::Start()
{
CreateTimerQueueTimer(&timer, NULL, [](void* This, BOOLEAN) { ((TextThread*)This)->Flush(); }, this, 10, 10, WT_EXECUTELONGFUNCTION);
}
2019-02-04 15:18:47 -05:00
void TextThread::Stop()
{
2019-02-04 15:18:47 -05:00
timer = NULL;
2018-07-18 16:18:43 -04:00
}
2019-02-09 00:30:38 -05:00
void TextThread::AddSentence(std::wstring&& sentence)
{
queuedSentences->emplace_back(std::move(sentence));
}
2019-02-09 18:24:54 -05:00
void TextThread::Push(BYTE* data, int length)
{
2019-02-09 00:30:38 -05:00
if (length < 0) return;
2019-01-09 22:35:01 -05:00
std::scoped_lock lock(bufferMutex);
2019-01-28 07:25:58 -05:00
BYTE doubleByteChar[2];
2019-02-09 00:30:38 -05:00
if (length == 1) // doublebyte characters must be processed as pairs
2019-02-09 18:24:54 -05:00
if (leadByte) std::tie(doubleByteChar[0], doubleByteChar[1], data, length, leadByte) = std::tuple(leadByte, data[0], doubleByteChar, 2, 0);
2019-02-09 00:30:38 -05:00
else if (IsDBCSLeadByteEx(hp.codepage ? hp.codepage : Host::defaultCodepage, data[0])) std::tie(leadByte, length) = std::tuple(data[0], 0);
2019-01-28 07:25:58 -05:00
2019-02-09 18:24:54 -05:00
if (hp.type & USING_UNICODE) buffer.append((wchar_t*)data, length / sizeof(wchar_t));
else if (auto converted = Util::StringToWideString(std::string((char*)data, length), hp.codepage ? hp.codepage : Host::defaultCodepage)) buffer.append(converted.value());
2018-11-25 16:23:41 -05:00
else Host::AddConsoleOutput(INVALID_CODEPAGE);
2018-11-03 21:41:38 -04:00
lastPushTime = GetTickCount();
2019-02-06 19:48:42 -05:00
if (filterRepetition)
{
2019-02-06 19:48:42 -05:00
if (std::all_of(buffer.begin(), buffer.end(), [&](auto ch) { return repeatingChars.find(ch) != repeatingChars.end(); })) buffer.clear();
if (Util::RemoveRepetition(buffer)) // sentence repetition detected, which means the entire sentence has already been received
{
repeatingChars = std::unordered_set(buffer.begin(), buffer.end());
2019-02-09 00:30:38 -05:00
AddSentence(std::move(buffer));
2019-02-06 19:48:42 -05:00
buffer.clear();
}
}
if (flushDelay == 0 && hp.type & USING_STRING)
{
AddSentence(std::move(buffer));
buffer.clear();
}
}
2018-12-13 22:44:55 -05:00
void TextThread::Flush()
2018-11-03 21:41:38 -04:00
{
2019-04-26 20:55:07 -04:00
if (storage->size() > 10'000'000) storage->erase(0, 8'000'000); // https://github.com/Artikash/Textractor/issues/127#issuecomment-486882983
std::deque<std::wstring> sentences;
queuedSentences->swap(sentences);
for (auto& sentence : sentences)
2019-03-27 23:35:22 -04:00
{
sentence.erase(std::remove(sentence.begin(), sentence.end(), L'\0'));
2019-02-04 15:18:47 -05:00
if (Output(*this, sentence)) storage->append(sentence);
2019-03-27 23:35:22 -04:00
}
2019-01-09 22:35:01 -05:00
std::scoped_lock lock(bufferMutex);
if (buffer.empty()) return;
2019-02-04 15:18:47 -05:00
if (buffer.size() > maxBufferSize || GetTickCount() - lastPushTime > flushDelay)
{
2019-02-09 00:30:38 -05:00
AddSentence(std::move(buffer));
2019-02-04 15:18:47 -05:00
buffer.clear();
}
}