From 40e62a13d638c4cda18cb4aa5fe0d281c8b129b4 Mon Sep 17 00:00:00 2001 From: Akash Mozumdar Date: Wed, 20 Feb 2019 22:12:26 -0500 Subject: [PATCH] upgrade repetition remover algorithm --- extensions/removerepeat.cpp | 28 +++++++++++++++++++++------- vnrhook/engine/engine.cc | 2 +- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/extensions/removerepeat.cpp b/extensions/removerepeat.cpp index dfa8cbc..2ccb184 100644 --- a/extensions/removerepeat.cpp +++ b/extensions/removerepeat.cpp @@ -6,11 +6,11 @@ void RemoveRepeatedChars(std::wstring& sentence) int repeatNumber = 1; wchar_t prevChar = L'\0'; for (auto nextChar : sentence) - if (nextChar == prevChar) repeatNumber++; + if (nextChar == prevChar) repeatNumber += 1; else { prevChar = nextChar; - ++repeatNumbers.at(repeatNumber); + repeatNumbers.at(repeatNumber) += 1; repeatNumber = 1; } if ((repeatNumber = std::distance(repeatNumbers.begin(), std::max_element(repeatNumbers.begin(), repeatNumbers.end()))) == 1) return; @@ -33,13 +33,24 @@ void RemoveRepeatedChars(std::wstring& sentence) void RemoveCyclicRepeats(std::wstring& sentence) { -remove: - for (std::wstring junk = sentence; junk.size() > 4; junk.pop_back()) - if (sentence.rfind(junk) > 0) + auto data = std::make_unique(sentence.size() + 1); + wcscpy_s(data.get(), sentence.size() + 1, sentence.c_str()); + wchar_t* dataEnd = data.get() + sentence.size(); + int skip = 0, count = 0; + for (wchar_t* end = dataEnd; end - data.get() > skip; --end) + { + std::swap(*end, *dataEnd); + int junkLength = end - data.get() - skip; + auto junkFound = wcsstr(sentence.c_str() + skip + junkLength, data.get() + skip); + std::swap(*end, *dataEnd); + if (junkFound) { - sentence.erase(0, junk.size()); - goto remove; + skip += junkLength; + count += 1; + end = dataEnd; } + } + if (count && skip / count >= 3) sentence = data.get() + skip; } bool ProcessSentence(std::wstring& sentence, SentenceInfo sentenceInfo) @@ -57,8 +68,11 @@ TEST( assert(repeatedChars.find(L"aaaabbcd") == 0); std::wstring cyclicRepeats = L"abcdeabcdefabcdefgabcdefgabcdefgabcdefgabcdefg"; + std::wstring buildupRepeats = L"aababcabcdabcdeabcdefabcdefg"; RemoveCyclicRepeats(cyclicRepeats); + RemoveCyclicRepeats(buildupRepeats); assert(cyclicRepeats == L"abcdefg"); + assert(buildupRepeats == L"abcdefg"); std::wstring empty = L"", one = L" ", normal = L"This is a normal sentence. はい"; ProcessSentence(empty, { SentenceInfo::DUMMY }); diff --git a/vnrhook/engine/engine.cc b/vnrhook/engine/engine.cc index f9f5fac..ed3ec10 100644 --- a/vnrhook/engine/engine.cc +++ b/vnrhook/engine/engine.cc @@ -10237,7 +10237,7 @@ void SpecialHookV8String(DWORD dwDatabase, HookParam* hp, BYTE, DWORD* data, DWO DWORD strPtr = *(DWORD*)ecx; *data = strPtr + 0xb; *len = *(short*)(strPtr + 7); - if (*len < 12) *split = 1; // To ensure this is caught by cyclic repetition detection, split if there's 6+ wide chars + //if (*len < 12) *split = 1; // To ensure this is caught by cyclic repetition detection, split if there's 6+ wide chars //*split = *(DWORD*)((BYTE*)hp->split + dwDatabase); }