From b28b68d2187e7d8b3e88d0fc6380ce16a0599042 Mon Sep 17 00:00:00 2001 From: Akash Mozumdar Date: Wed, 30 Jun 2021 20:58:40 -0600 Subject: [PATCH] upgrade character deduplication algorithm --- extensions/removerepeatchar.cpp | 26 ++++++++++++-------------- extensions/removerepeatsentence.cpp | 2 +- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/extensions/removerepeatchar.cpp b/extensions/removerepeatchar.cpp index 6da1f2c..cd8a3e0 100644 --- a/extensions/removerepeatchar.cpp +++ b/extensions/removerepeatchar.cpp @@ -5,30 +5,25 @@ bool ProcessSentence(std::wstring& sentence, SentenceInfo sentenceInfo) if (sentenceInfo["text number"] == 0) return false; std::vector repeatNumbers(sentence.size() + 1, 0); - int repeatNumber = 1; - wchar_t prevChar = 0; - for (auto nextChar : sentence) + for (int i = 0; i < sentence.size(); ++i) { - if (nextChar == prevChar) + if (sentence[i] != sentence[i + 1]) { - repeatNumber += 1; - } - else - { - prevChar = nextChar; - repeatNumbers.at(repeatNumber) += 1; - repeatNumber = 1; + int j = i; + while (sentence[j] == sentence[i] && --j >= 0); + repeatNumbers[i - j] += 1; } } - if ((repeatNumber = std::distance(repeatNumbers.begin(), std::max_element(repeatNumbers.begin(), repeatNumbers.end()))) == 1) return false; + int repeatNumber = std::distance(repeatNumbers.begin(), std::max_element(repeatNumbers.rbegin(), repeatNumbers.rend()).base() - 1); + if (repeatNumber < 2) return false; std::wstring newSentence; for (int i = 0; i < sentence.size();) { - newSentence.push_back(sentence.at(i)); + newSentence.push_back(sentence[i]); for (int j = i; j <= sentence.size(); ++j) { - if (j == sentence.size() || sentence.at(i) != sentence.at(j)) + if (j == sentence.size() || sentence[i] != sentence[j]) { i += (j - i) % repeatNumber == 0 ? repeatNumber : 1; break; @@ -44,8 +39,11 @@ TEST( InfoForExtension nonConsole[] = { { "text number", 1 }, {} }; std::wstring repeatedChars = L"aaaaaaaaaaaabbbbbbcccdddaabbbcccddd"; + std::wstring someRepeatedChars = L"abcdefaabbccddeeff"; ProcessSentence(repeatedChars, { nonConsole }); + ProcessSentence(someRepeatedChars, { nonConsole }); assert(repeatedChars.find(L"aaaabbcd") == 0); + assert(someRepeatedChars == L"abcdefabcdef"); std::wstring empty = L"", one = L" ", normal = L"This is a normal sentence. はい"; ProcessSentence(empty, { nonConsole }); diff --git a/extensions/removerepeatsentence.cpp b/extensions/removerepeatsentence.cpp index b07cc4d..a3ab346 100644 --- a/extensions/removerepeatsentence.cpp +++ b/extensions/removerepeatsentence.cpp @@ -30,7 +30,7 @@ bool ProcessSentence(std::wstring& sentence, SentenceInfo sentenceInfo) static std::mutex m; m.lock(); if (textNumber + 1 > cache.size()) cache.resize(textNumber + 1); - auto prevSentences = cache.at(textNumber).Acquire(); + auto prevSentences = cache[textNumber].Acquire(); m.unlock(); auto& inserted = prevSentences->emplace_back(sentence); auto firstLocation = std::find(prevSentences->begin(), prevSentences->end(), sentence);