forked from Public-Mirror/Textractor
improve repetition remover perf
This commit is contained in:
parent
1d17902c64
commit
d9a3d5cefd
@ -22,7 +22,17 @@ void RemoveRepeatedChars(std::wstring& sentence)
|
||||
|
||||
void RemoveCyclicRepeats(std::wstring& sentence)
|
||||
{
|
||||
for (std::wsmatch results; std::regex_search(sentence, results, std::wregex(L"^([^\\x00]{5,})[^\\x00]*?\\1")); sentence.erase(0, results[1].length()));
|
||||
remove:
|
||||
std::wstring junk = sentence;
|
||||
while (junk.size() > 4)
|
||||
{
|
||||
junk.pop_back();
|
||||
if (sentence.rfind(junk) > 0)
|
||||
{
|
||||
sentence.erase(0, junk.size());
|
||||
goto remove;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool ProcessSentence(std::wstring& sentence, SentenceInfo sentenceInfo)
|
||||
|
Loading…
x
Reference in New Issue
Block a user