2016-01-06 00:01:17 +09:00
|
|
|
#include "textthread.h"
|
2018-11-22 15:53:32 -05:00
|
|
|
#include "host.h"
|
2016-01-06 00:01:17 +09:00
|
|
|
|
2019-02-27 11:33:17 -05:00
|
|
|
extern const wchar_t* INVALID_CODEPAGE;
|
|
|
|
|
2019-06-29 15:43:26 +05:30
|
|
|
// return true if repetition found (see https://github.com/Artikash/Textractor/issues/40)
|
|
|
|
static bool RemoveRepetition(std::wstring& text)
|
|
|
|
{
|
|
|
|
wchar_t* end = text.data() + text.size();
|
|
|
|
for (int length = text.size() / 3; length > 6; --length)
|
|
|
|
if (memcmp(end - length * 3, end - length * 2, length * sizeof(wchar_t)) == 0 && memcmp(end - length * 3, end - length * 1, length * sizeof(wchar_t)) == 0)
|
|
|
|
return RemoveRepetition(text = std::wstring(end - length, length)), true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-12-13 22:44:55 -05:00
|
|
|
TextThread::TextThread(ThreadParam tp, HookParam hp, std::optional<std::wstring> name) :
|
|
|
|
handle(threadCounter++),
|
2020-02-28 00:34:34 -07:00
|
|
|
name(name.value_or(StringToWideString(hp.name))),
|
2018-12-13 22:44:55 -05:00
|
|
|
tp(tp),
|
2018-12-03 18:29:30 -05:00
|
|
|
hp(hp)
|
2019-02-04 15:18:47 -05:00
|
|
|
{}
|
|
|
|
|
|
|
|
void TextThread::Start()
|
2018-11-27 15:54:04 -05:00
|
|
|
{
|
2019-01-01 15:15:09 -05:00
|
|
|
CreateTimerQueueTimer(&timer, NULL, [](void* This, BOOLEAN) { ((TextThread*)This)->Flush(); }, this, 10, 10, WT_EXECUTELONGFUNCTION);
|
2018-11-27 15:54:04 -05:00
|
|
|
}
|
2016-01-06 00:01:17 +09:00
|
|
|
|
2019-02-04 15:18:47 -05:00
|
|
|
void TextThread::Stop()
|
2016-01-06 00:01:17 +09:00
|
|
|
{
|
2019-02-04 15:18:47 -05:00
|
|
|
timer = NULL;
|
2018-07-18 16:18:43 -04:00
|
|
|
}
|
|
|
|
|
2019-07-02 11:26:04 +05:30
|
|
|
void TextThread::AddSentence(std::wstring sentence)
|
2019-01-01 15:15:09 -05:00
|
|
|
{
|
2019-06-04 15:21:04 -04:00
|
|
|
queuedSentences->emplace_back(std::move(sentence));
|
2019-01-01 15:15:09 -05:00
|
|
|
}
|
|
|
|
|
2019-02-09 18:24:54 -05:00
|
|
|
void TextThread::Push(BYTE* data, int length)
|
2018-07-11 20:18:04 -04:00
|
|
|
{
|
2019-02-09 00:30:38 -05:00
|
|
|
if (length < 0) return;
|
2019-01-09 22:35:01 -05:00
|
|
|
std::scoped_lock lock(bufferMutex);
|
2019-01-28 07:25:58 -05:00
|
|
|
|
|
|
|
BYTE doubleByteChar[2];
|
2019-02-09 00:30:38 -05:00
|
|
|
if (length == 1) // doublebyte characters must be processed as pairs
|
2019-02-09 18:24:54 -05:00
|
|
|
if (leadByte) std::tie(doubleByteChar[0], doubleByteChar[1], data, length, leadByte) = std::tuple(leadByte, data[0], doubleByteChar, 2, 0);
|
2019-02-09 00:30:38 -05:00
|
|
|
else if (IsDBCSLeadByteEx(hp.codepage ? hp.codepage : Host::defaultCodepage, data[0])) std::tie(leadByte, length) = std::tuple(data[0], 0);
|
2019-01-28 07:25:58 -05:00
|
|
|
|
2020-01-19 14:23:30 -07:00
|
|
|
if (hp.type & HEX_DUMP) for (int i = 0; i < length; i += sizeof(short)) buffer.append(FormatString(L"%04hX ", *(short*)(data + i)));
|
|
|
|
else if (hp.type & USING_UNICODE) buffer.append((wchar_t*)data, length / sizeof(wchar_t));
|
2020-02-28 00:34:34 -07:00
|
|
|
else if (auto converted = StringToWideString(std::string((char*)data, length), hp.codepage ? hp.codepage : Host::defaultCodepage)) buffer.append(converted.value());
|
2018-11-25 16:23:41 -05:00
|
|
|
else Host::AddConsoleOutput(INVALID_CODEPAGE);
|
2019-07-19 01:15:00 +03:00
|
|
|
if (hp.type & FULL_STRING) buffer.push_back(L'\n');
|
2018-11-03 21:41:38 -04:00
|
|
|
lastPushTime = GetTickCount();
|
2019-02-06 19:48:42 -05:00
|
|
|
|
|
|
|
if (filterRepetition)
|
2019-01-01 15:15:09 -05:00
|
|
|
{
|
2020-01-18 23:25:57 -07:00
|
|
|
if (std::all_of(buffer.begin(), buffer.end(), [&](wchar_t ch) { return repeatingChars.find(ch) != repeatingChars.end(); })) buffer.clear();
|
2019-06-29 15:43:26 +05:30
|
|
|
if (RemoveRepetition(buffer)) // sentence repetition detected, which means the entire sentence has already been received
|
2019-02-06 19:48:42 -05:00
|
|
|
{
|
|
|
|
repeatingChars = std::unordered_set(buffer.begin(), buffer.end());
|
2019-02-09 00:30:38 -05:00
|
|
|
AddSentence(std::move(buffer));
|
2019-02-06 19:48:42 -05:00
|
|
|
buffer.clear();
|
|
|
|
}
|
2019-01-01 15:15:09 -05:00
|
|
|
}
|
2019-06-02 23:05:01 -04:00
|
|
|
|
2019-07-19 01:15:00 +03:00
|
|
|
if (flushDelay == 0 && hp.type & FULL_STRING)
|
2019-06-02 23:05:01 -04:00
|
|
|
{
|
|
|
|
AddSentence(std::move(buffer));
|
|
|
|
buffer.clear();
|
|
|
|
}
|
2018-12-28 11:13:02 -05:00
|
|
|
}
|
|
|
|
|
2020-04-25 20:34:53 -06:00
|
|
|
void TextThread::Push(const wchar_t* data)
|
|
|
|
{
|
|
|
|
std::scoped_lock lock(bufferMutex);
|
|
|
|
// not sure if this should filter repetition
|
|
|
|
lastPushTime = GetTickCount();
|
|
|
|
buffer += data;
|
|
|
|
}
|
|
|
|
|
2018-12-13 22:44:55 -05:00
|
|
|
void TextThread::Flush()
|
2018-11-03 21:41:38 -04:00
|
|
|
{
|
2019-08-19 15:58:53 -04:00
|
|
|
{
|
|
|
|
auto storage = this->storage.Acquire();
|
|
|
|
if (storage->size() > maxHistorySize) storage->erase(0, storage->size() - maxHistorySize); // https://github.com/Artikash/Textractor/issues/127#issuecomment-486882983
|
|
|
|
}
|
2019-04-26 20:55:07 -04:00
|
|
|
|
2020-03-02 23:38:51 -07:00
|
|
|
std::vector<std::wstring> sentences;
|
2019-06-04 15:21:04 -04:00
|
|
|
queuedSentences->swap(sentences);
|
2019-09-30 08:45:01 -04:00
|
|
|
int totalSize = 0;
|
2019-06-04 15:21:04 -04:00
|
|
|
for (auto& sentence : sentences)
|
2019-03-27 23:35:22 -04:00
|
|
|
{
|
2019-09-30 08:45:01 -04:00
|
|
|
totalSize += sentence.size();
|
2020-03-05 02:23:53 -07:00
|
|
|
sentence.erase(std::remove(sentence.begin(), sentence.end(), L'\0'), sentence.end());
|
2019-02-04 15:18:47 -05:00
|
|
|
if (Output(*this, sentence)) storage->append(sentence);
|
2019-03-27 23:35:22 -04:00
|
|
|
}
|
2018-12-02 15:30:35 -05:00
|
|
|
|
2019-01-09 22:35:01 -05:00
|
|
|
std::scoped_lock lock(bufferMutex);
|
2019-01-01 15:15:09 -05:00
|
|
|
if (buffer.empty()) return;
|
2019-02-04 15:18:47 -05:00
|
|
|
if (buffer.size() > maxBufferSize || GetTickCount() - lastPushTime > flushDelay)
|
|
|
|
{
|
2019-02-09 00:30:38 -05:00
|
|
|
AddSentence(std::move(buffer));
|
2019-02-04 15:18:47 -05:00
|
|
|
buffer.clear();
|
|
|
|
}
|
2018-07-11 20:18:04 -04:00
|
|
|
}
|