2024-02-07 20:59:24 +08:00
|
|
|
#include "textthread.h"
|
|
|
|
#include "host.h"
|
2024-10-03 14:53:59 +08:00
|
|
|
#include "Lang/Lang.h"
|
2024-02-07 20:59:24 +08:00
|
|
|
|
|
|
|
// return true if repetition found (see https://github.com/Artikash/Textractor/issues/40)
|
2024-10-03 14:53:59 +08:00
|
|
|
static bool RemoveRepetition(std::wstring &text)
|
2024-02-07 20:59:24 +08:00
|
|
|
{
|
2024-10-03 14:53:59 +08:00
|
|
|
wchar_t *end = text.data() + text.size();
|
2024-02-07 20:59:24 +08:00
|
|
|
for (int length = text.size() / 3; length > 6; --length)
|
|
|
|
if (memcmp(end - length * 3, end - length * 2, length * sizeof(wchar_t)) == 0 && memcmp(end - length * 3, end - length * 1, length * sizeof(wchar_t)) == 0)
|
|
|
|
return RemoveRepetition(text = std::wstring(end - length, length)), true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2024-10-03 14:53:59 +08:00
|
|
|
TextThread::TextThread(ThreadParam tp, HookParam hp, std::optional<std::wstring> name) : handle(threadCounter++),
|
|
|
|
name(name.value_or(StringToWideString(hp.name))),
|
|
|
|
tp(tp),
|
|
|
|
hp(hp)
|
|
|
|
{
|
|
|
|
}
|
2024-02-07 20:59:24 +08:00
|
|
|
|
|
|
|
void TextThread::Start()
|
|
|
|
{
|
2024-10-03 14:53:59 +08:00
|
|
|
CreateTimerQueueTimer(&timer, NULL, [](void *This, auto)
|
|
|
|
{ ((TextThread *)This)->Flush(); }, this, 10, 10, WT_EXECUTELONGFUNCTION);
|
2024-02-07 20:59:24 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void TextThread::Stop()
|
|
|
|
{
|
|
|
|
timer = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
void TextThread::AddSentence(std::wstring sentence)
|
|
|
|
{
|
|
|
|
queuedSentences->emplace_back(std::move(sentence));
|
|
|
|
}
|
|
|
|
|
2024-10-03 14:53:59 +08:00
|
|
|
void TextThread::Push(BYTE *data, int length)
|
2024-02-07 20:59:24 +08:00
|
|
|
{
|
2024-10-03 14:53:59 +08:00
|
|
|
if (length < 0)
|
|
|
|
return;
|
2024-02-07 20:59:24 +08:00
|
|
|
std::scoped_lock lock(bufferMutex);
|
|
|
|
|
|
|
|
BYTE doubleByteChar[2];
|
|
|
|
if (length == 1) // doublebyte characters must be processed as pairs
|
|
|
|
{
|
|
|
|
if (leadByte)
|
|
|
|
{
|
|
|
|
doubleByteChar[0] = leadByte;
|
|
|
|
doubleByteChar[1] = data[0];
|
|
|
|
data = doubleByteChar;
|
|
|
|
length = 2;
|
|
|
|
leadByte = 0;
|
|
|
|
}
|
|
|
|
else if (IsDBCSLeadByteEx(hp.codepage ? hp.codepage : Host::defaultCodepage, data[0]))
|
|
|
|
{
|
|
|
|
leadByte = data[0];
|
|
|
|
length = 0;
|
|
|
|
}
|
|
|
|
}
|
2024-10-03 14:53:59 +08:00
|
|
|
auto converted = commonparsestring(data, length, &hp, Host::defaultCodepage);
|
|
|
|
if (converted)
|
2024-03-29 21:44:04 +08:00
|
|
|
{
|
|
|
|
buffer.append(converted.value());
|
2024-10-03 14:53:59 +08:00
|
|
|
if (hp.type & FULL_STRING && converted.value().size() > 1)
|
|
|
|
buffer.push_back(L'\n');
|
2024-03-29 21:44:04 +08:00
|
|
|
}
|
2024-10-03 14:53:59 +08:00
|
|
|
else
|
|
|
|
Host::AddConsoleOutput(INVALID_CODEPAGE);
|
2024-02-07 20:59:24 +08:00
|
|
|
|
2024-11-03 21:19:59 +08:00
|
|
|
UpdateFlushTime();
|
2024-10-03 14:53:59 +08:00
|
|
|
|
2024-02-07 20:59:24 +08:00
|
|
|
if (filterRepetition)
|
|
|
|
{
|
2024-10-03 14:53:59 +08:00
|
|
|
if (std::all_of(buffer.begin(), buffer.end(), [&](wchar_t ch)
|
|
|
|
{ return repeatingChars.find(ch) != repeatingChars.end(); }))
|
|
|
|
buffer.clear();
|
2024-02-07 20:59:24 +08:00
|
|
|
if (RemoveRepetition(buffer)) // sentence repetition detected, which means the entire sentence has already been received
|
|
|
|
{
|
|
|
|
repeatingChars = std::unordered_set(buffer.begin(), buffer.end());
|
|
|
|
AddSentence(std::move(buffer));
|
|
|
|
buffer.clear();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-09-02 17:30:20 +08:00
|
|
|
if (flushDelay == 0 && hp.type & FULL_STRING)
|
2024-02-07 20:59:24 +08:00
|
|
|
{
|
|
|
|
AddSentence(std::move(buffer));
|
|
|
|
buffer.clear();
|
|
|
|
}
|
|
|
|
}
|
2024-11-03 21:19:59 +08:00
|
|
|
void TextThread::UpdateFlushTime(bool recursive)
|
|
|
|
{
|
|
|
|
lastPushTime = GetTickCount64();
|
|
|
|
if (!recursive)
|
|
|
|
return;
|
|
|
|
auto&& ths = syncThreads.Acquire().contents;
|
|
|
|
if (ths.find(this) == ths.end())
|
|
|
|
return;
|
|
|
|
for (auto t : ths)
|
|
|
|
{
|
|
|
|
if (t == this)
|
|
|
|
continue;
|
|
|
|
t->UpdateFlushTime(false);
|
|
|
|
}
|
|
|
|
}
|
2024-10-03 14:53:59 +08:00
|
|
|
void TextThread::Push(const wchar_t *data)
|
2024-02-07 20:59:24 +08:00
|
|
|
{
|
|
|
|
std::scoped_lock lock(bufferMutex);
|
|
|
|
// not sure if this should filter repetition
|
2024-11-03 21:19:59 +08:00
|
|
|
UpdateFlushTime();
|
2024-02-07 20:59:24 +08:00
|
|
|
buffer += data;
|
|
|
|
}
|
|
|
|
|
|
|
|
void TextThread::Flush()
|
|
|
|
{
|
|
|
|
{
|
|
|
|
auto storage = this->storage.Acquire();
|
2024-10-03 14:53:59 +08:00
|
|
|
if (storage->size() > maxHistorySize)
|
|
|
|
storage->erase(0, storage->size() - maxHistorySize); // https://github.com/Artikash/Textractor/issues/127#issuecomment-486882983
|
2024-02-07 20:59:24 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<std::wstring> sentences;
|
|
|
|
queuedSentences->swap(sentences);
|
2024-10-03 14:53:59 +08:00
|
|
|
for (auto &sentence : sentences)
|
2024-02-07 20:59:24 +08:00
|
|
|
{
|
|
|
|
sentence.erase(std::remove(sentence.begin(), sentence.end(), 0), sentence.end());
|
2024-10-03 14:53:59 +08:00
|
|
|
if (Output(*this, sentence))
|
|
|
|
storage->append(sentence + L"\n");
|
2024-02-07 20:59:24 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
std::scoped_lock lock(bufferMutex);
|
2024-10-03 14:53:59 +08:00
|
|
|
if (buffer.empty())
|
|
|
|
return;
|
2024-02-07 20:59:24 +08:00
|
|
|
if (buffer.size() > maxBufferSize || GetTickCount64() - lastPushTime > flushDelay)
|
|
|
|
{
|
|
|
|
AddSentence(std::move(buffer));
|
|
|
|
buffer.clear();
|
|
|
|
}
|
|
|
|
}
|