From 6a6c208c2076541ac6e31b0d44f2973c8e985b29 Mon Sep 17 00:00:00 2001 From: Akash Mozumdar Date: Mon, 7 Oct 2019 01:00:54 -0400 Subject: [PATCH] repetition filters bail after 30 seconds --- extensions/removerepeatphrase.cpp | 3 ++- extensions/removerepeatphrase2.cpp | 5 ++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/extensions/removerepeatphrase.cpp b/extensions/removerepeatphrase.cpp index f8af823..3785832 100644 --- a/extensions/removerepeatphrase.cpp +++ b/extensions/removerepeatphrase.cpp @@ -46,8 +46,9 @@ bool ProcessSentence(std::wstring& sentence, SentenceInfo sentenceInfo) // This algorithm looks for repeating substrings (in other words, common prefixes among the set of suffixes) of the sentence with length > 6 // It then looks for any regions of characters at least twice as long as the substring made up only of characters in the substring, and erases them // If this results in the substring being completely erased from the string, the substring is copied to the last location where it was located in the original string + auto timeout = GetTickCount64() + 30'000; // give up if taking over 30 seconds std::vector suffixArray = GenerateSuffixArray(sentence); - for (int i = 0; i + 1 < sentence.size(); ++i) + for (int i = 0; i + 1 < sentence.size() && GetTickCount64() < timeout; ++i) { int commonPrefixLength = 0; for (int j = suffixArray[i], k = suffixArray[i + 1]; j < sentence.size() && k < sentence.size(); ++j, ++k) diff --git a/extensions/removerepeatphrase2.cpp b/extensions/removerepeatphrase2.cpp index ee06f5e..8065139 100644 --- a/extensions/removerepeatphrase2.cpp +++ b/extensions/removerepeatphrase2.cpp @@ -5,13 +5,12 @@ bool ProcessSentence(std::wstring& sentence, SentenceInfo sentenceInfo) if (sentenceInfo["text number"] == 0) return false; // This algorithm looks at all the prefixes of the sentence: if a prefix is found later in the sentence, it is removed from the beginning and the process is repeated - // Complexity O(N^3) so executing for N > 10,000 dangerous - if (sentence.size() > 10000) return false; + auto timeout = GetTickCount64() + 30'000; // give up if taking over 30 seconds auto data = std::make_unique(sentence.size() + 1); wcscpy_s(data.get(), sentence.size() + 1, sentence.c_str()); wchar_t* dataEnd = data.get() + sentence.size(); int skip = 0, count = 0; - for (wchar_t* end = dataEnd; end - data.get() > skip; --end) + for (wchar_t* end = dataEnd; end - data.get() > skip && GetTickCount64() < timeout; --end) { std::swap(*end, *dataEnd); int junkLength = end - data.get() - skip;