mirror of
https://github.com/Artikash/Textractor.git
synced 2025-01-10 09:43:55 +08:00
optimize repetition removal at cost of some correctness (can be dealt with by extensions)
This commit is contained in:
parent
0492cb9549
commit
59869dc45a
@ -50,14 +50,10 @@ void TextThread::Flush()
|
|||||||
{
|
{
|
||||||
std::wstring sentence = buffer;
|
std::wstring sentence = buffer;
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
|
repeatingChars.clear();
|
||||||
|
|
||||||
locker.unlock(); // This algorithm might take a while
|
for (std::wsmatch results; std::regex_search(sentence, results, std::wregex(L"^([^]{6,})\\1\\1")); sentence = results[1])
|
||||||
std::unordered_set<wchar_t> repeatingChars;
|
|
||||||
for (std::wsmatch results; std::regex_search(sentence, results, std::wregex(L"([^\\x00]{6,})\\1\\1")); sentence = results[1])
|
|
||||||
repeatingChars = std::unordered_set(sentence.begin(), sentence.end());
|
repeatingChars = std::unordered_set(sentence.begin(), sentence.end());
|
||||||
locker.lock();
|
|
||||||
|
|
||||||
this->repeatingChars = repeatingChars;
|
|
||||||
|
|
||||||
locker.unlock();
|
locker.unlock();
|
||||||
AddSentence(sentence);
|
AddSentence(sentence);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user