2016-01-05 23:01:17 +08:00
# include "textthread.h"
2018-11-23 04:53:32 +08:00
# include "host.h"
2016-01-05 23:01:17 +08:00
2019-02-28 00:33:17 +08:00
extern const wchar_t * INVALID_CODEPAGE ;
2019-06-29 18:13:26 +08:00
// return true if repetition found (see https://github.com/Artikash/Textractor/issues/40)
static bool RemoveRepetition ( std : : wstring & text )
{
wchar_t * end = text . data ( ) + text . size ( ) ;
for ( int length = text . size ( ) / 3 ; length > 6 ; - - length )
if ( memcmp ( end - length * 3 , end - length * 2 , length * sizeof ( wchar_t ) ) = = 0 & & memcmp ( end - length * 3 , end - length * 1 , length * sizeof ( wchar_t ) ) = = 0 )
return RemoveRepetition ( text = std : : wstring ( end - length , length ) ) , true ;
return false ;
}
2018-12-14 11:44:55 +08:00
TextThread : : TextThread ( ThreadParam tp , HookParam hp , std : : optional < std : : wstring > name ) :
handle ( threadCounter + + ) ,
2020-02-28 15:34:34 +08:00
name ( name . value_or ( StringToWideString ( hp . name ) ) ) ,
2018-12-14 11:44:55 +08:00
tp ( tp ) ,
2018-12-04 07:29:30 +08:00
hp ( hp )
2019-02-05 04:18:47 +08:00
{ }
void TextThread : : Start ( )
2018-11-28 04:54:04 +08:00
{
2021-01-15 21:07:23 +08:00
CreateTimerQueueTimer ( & timer , NULL , [ ] ( void * This , auto ) { ( ( TextThread * ) This ) - > Flush ( ) ; } , this , 10 , 10 , WT_EXECUTELONGFUNCTION ) ;
2018-11-28 04:54:04 +08:00
}
2016-01-05 23:01:17 +08:00
2019-02-05 04:18:47 +08:00
void TextThread : : Stop ( )
2016-01-05 23:01:17 +08:00
{
2019-02-05 04:18:47 +08:00
timer = NULL ;
2018-07-19 04:18:43 +08:00
}
2019-07-02 13:56:04 +08:00
void TextThread : : AddSentence ( std : : wstring sentence )
2019-01-02 04:15:09 +08:00
{
2019-06-05 03:21:04 +08:00
queuedSentences - > emplace_back ( std : : move ( sentence ) ) ;
2019-01-02 04:15:09 +08:00
}
2019-02-10 07:24:54 +08:00
void TextThread : : Push ( BYTE * data , int length )
2018-07-12 08:18:04 +08:00
{
2019-02-09 13:30:38 +08:00
if ( length < 0 ) return ;
2019-01-10 11:35:01 +08:00
std : : scoped_lock lock ( bufferMutex ) ;
2019-01-28 20:25:58 +08:00
BYTE doubleByteChar [ 2 ] ;
2019-02-09 13:30:38 +08:00
if ( length = = 1 ) // doublebyte characters must be processed as pairs
2021-01-15 21:07:23 +08:00
{
if ( leadByte )
{
doubleByteChar [ 0 ] = leadByte ;
doubleByteChar [ 1 ] = data [ 0 ] ;
data = doubleByteChar ;
length = 2 ;
leadByte = 0 ;
}
else if ( IsDBCSLeadByteEx ( hp . codepage ? hp . codepage : Host : : defaultCodepage , data [ 0 ] ) )
{
leadByte = data [ 0 ] ;
length = 0 ;
}
}
2019-01-28 20:25:58 +08:00
2023-04-10 22:33:01 +08:00
if ( flushDelaySpacing & & ! buffer . empty ( ) & & ( hp . type & ( USING_STRING ) ) ) buffer + = L " \x200b " ; // insert \x200b to recognize it in case it has to be found with a filter
2020-01-20 05:23:30 +08:00
if ( hp . type & HEX_DUMP ) for ( int i = 0 ; i < length ; i + = sizeof ( short ) ) buffer . append ( FormatString ( L " %04hX " , * ( short * ) ( data + i ) ) ) ;
else if ( hp . type & USING_UNICODE ) buffer . append ( ( wchar_t * ) data , length / sizeof ( wchar_t ) ) ;
2020-02-28 15:34:34 +08:00
else if ( auto converted = StringToWideString ( std : : string ( ( char * ) data , length ) , hp . codepage ? hp . codepage : Host : : defaultCodepage ) ) buffer . append ( converted . value ( ) ) ;
2018-11-26 05:23:41 +08:00
else Host : : AddConsoleOutput ( INVALID_CODEPAGE ) ;
2019-07-19 06:15:00 +08:00
if ( hp . type & FULL_STRING ) buffer . push_back ( L ' \n ' ) ;
2021-07-02 03:35:33 +08:00
lastPushTime = GetTickCount64 ( ) ;
2019-02-07 08:48:42 +08:00
if ( filterRepetition )
2019-01-02 04:15:09 +08:00
{
2020-01-19 14:25:57 +08:00
if ( std : : all_of ( buffer . begin ( ) , buffer . end ( ) , [ & ] ( wchar_t ch ) { return repeatingChars . find ( ch ) ! = repeatingChars . end ( ) ; } ) ) buffer . clear ( ) ;
2019-06-29 18:13:26 +08:00
if ( RemoveRepetition ( buffer ) ) // sentence repetition detected, which means the entire sentence has already been received
2019-02-07 08:48:42 +08:00
{
repeatingChars = std : : unordered_set ( buffer . begin ( ) , buffer . end ( ) ) ;
2019-02-09 13:30:38 +08:00
AddSentence ( std : : move ( buffer ) ) ;
2019-02-07 08:48:42 +08:00
buffer . clear ( ) ;
}
2019-01-02 04:15:09 +08:00
}
2019-06-03 11:05:01 +08:00
2019-07-19 06:15:00 +08:00
if ( flushDelay = = 0 & & hp . type & FULL_STRING )
2019-06-03 11:05:01 +08:00
{
AddSentence ( std : : move ( buffer ) ) ;
buffer . clear ( ) ;
}
2018-12-29 00:13:02 +08:00
}
2020-04-26 10:34:53 +08:00
void TextThread : : Push ( const wchar_t * data )
{
std : : scoped_lock lock ( bufferMutex ) ;
// not sure if this should filter repetition
2021-07-02 03:35:33 +08:00
lastPushTime = GetTickCount64 ( ) ;
2020-04-26 10:34:53 +08:00
buffer + = data ;
}
2018-12-14 11:44:55 +08:00
void TextThread : : Flush ( )
2018-11-04 09:41:38 +08:00
{
2019-08-20 03:58:53 +08:00
{
auto storage = this - > storage . Acquire ( ) ;
if ( storage - > size ( ) > maxHistorySize ) storage - > erase ( 0 , storage - > size ( ) - maxHistorySize ) ; // https://github.com/Artikash/Textractor/issues/127#issuecomment-486882983
}
2019-04-27 08:55:07 +08:00
2020-03-03 14:38:51 +08:00
std : : vector < std : : wstring > sentences ;
2019-06-05 03:21:04 +08:00
queuedSentences - > swap ( sentences ) ;
2019-09-30 20:45:01 +08:00
int totalSize = 0 ;
2019-06-05 03:21:04 +08:00
for ( auto & sentence : sentences )
2019-03-28 11:35:22 +08:00
{
2019-09-30 20:45:01 +08:00
totalSize + = sentence . size ( ) ;
2021-03-10 12:32:56 +08:00
sentence . erase ( std : : remove ( sentence . begin ( ) , sentence . end ( ) , 0 ) , sentence . end ( ) ) ;
2019-02-05 04:18:47 +08:00
if ( Output ( * this , sentence ) ) storage - > append ( sentence ) ;
2019-03-28 11:35:22 +08:00
}
2018-12-03 04:30:35 +08:00
2019-01-10 11:35:01 +08:00
std : : scoped_lock lock ( bufferMutex ) ;
2019-01-02 04:15:09 +08:00
if ( buffer . empty ( ) ) return ;
2021-07-02 03:35:33 +08:00
if ( buffer . size ( ) > maxBufferSize | | GetTickCount64 ( ) - lastPushTime > flushDelay )
2019-02-05 04:18:47 +08:00
{
2019-02-09 13:30:38 +08:00
AddSentence ( std : : move ( buffer ) ) ;
2019-02-05 04:18:47 +08:00
buffer . clear ( ) ;
}
2018-07-12 08:18:04 +08:00
}