forked from Public-Mirror/Textractor
clean up repetition remover. dont need to remove repeated sentences since that detection was added in textthread
This commit is contained in:
parent
219804cecc
commit
ddc8313d37
@ -1,61 +1,34 @@
|
||||
#include "../extension.h"
|
||||
|
||||
bool RemoveRepeatedChars(std::wstring& sentence)
|
||||
void RemoveRepeatedChars(std::wstring& sentence)
|
||||
{
|
||||
int repeatNumber = 0;
|
||||
wchar_t prevChar = sentence[0];
|
||||
for (auto c : sentence)
|
||||
if (c == prevChar) repeatNumber++;
|
||||
else break;
|
||||
if (repeatNumber == 1) return false;
|
||||
if (repeatNumber == 1) return;
|
||||
|
||||
for (int i = 0; i < sentence.size(); i += repeatNumber)
|
||||
for (int j = i; j < sentence.size(); ++j)
|
||||
if (sentence[j] != sentence[i])
|
||||
if ((j - i) % repeatNumber != 0) return false;
|
||||
if ((j - i) % repeatNumber != 0) return;
|
||||
else break;
|
||||
|
||||
std::wstring newSentence = L"";
|
||||
for (int i = 0; i < sentence.size(); ++i) if (i % repeatNumber == 0) newSentence.push_back(sentence[i]);
|
||||
for (int i = 0; i < sentence.size(); i += repeatNumber) newSentence.push_back(sentence[i]);
|
||||
sentence = newSentence;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RemoveCyclicRepeats(std::wstring& sentence)
|
||||
void RemoveCyclicRepeats(std::wstring& sentence)
|
||||
{
|
||||
if (sentence == L"") Skip();
|
||||
int junkLength = 0;
|
||||
wchar_t junk[2000] = {};
|
||||
while (wcsstr(sentence.c_str() + junkLength, junk))
|
||||
{
|
||||
junk[junkLength] = sentence[junkLength];
|
||||
if (++junkLength >= 2000) return false;
|
||||
}
|
||||
if (--junkLength >= 5) // If the first 5 characters appear later on, there's probably a repetition issue.
|
||||
{
|
||||
sentence = std::wstring(sentence.c_str() + junkLength);
|
||||
RemoveCyclicRepeats(sentence);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool RemoveRepeatedSentences(std::wstring& sentence, int64_t handle)
|
||||
{
|
||||
static std::unordered_map<int64_t, std::unordered_set<std::wstring>> seenSentences;
|
||||
static std::mutex m;
|
||||
std::lock_guard<std::mutex> l(m);
|
||||
if (seenSentences[handle].count(sentence)) Skip();
|
||||
seenSentences[handle].insert(sentence);
|
||||
return false;
|
||||
for (std::wsmatch results; std::regex_search(sentence, results, std::wregex(L"^([^\\x00]{5,})[^\\x00]*?\\1")); sentence.erase(0, results[1].length()));
|
||||
}
|
||||
|
||||
bool ProcessSentence(std::wstring& sentence, SentenceInfo sentenceInfo)
|
||||
{
|
||||
if (sentenceInfo["hook address"] == -1) return false;
|
||||
bool ret = false;
|
||||
ret |= RemoveRepeatedChars(sentence);
|
||||
ret |= RemoveCyclicRepeats(sentence);
|
||||
ret |= RemoveRepeatedSentences(sentence, sentenceInfo["text handle"]);
|
||||
return ret;
|
||||
}
|
||||
RemoveRepeatedChars(sentence);
|
||||
RemoveCyclicRepeats(sentence);
|
||||
return true;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user