forked from Public-Mirror/Textractor
improve repetition detection
This commit is contained in:
parent
20956f6412
commit
329d56c969
@ -1,7 +1,6 @@
|
|||||||
#include "extensions.h"
|
#include "extensions.h"
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
bool RemoveRepeatedChars(std::wstring& sentence)
|
bool RemoveRepeatedChars(std::wstring& sentence)
|
||||||
{
|
{
|
||||||
@ -18,44 +17,46 @@ bool RemoveRepeatedChars(std::wstring& sentence)
|
|||||||
if ((j - i) % repeatNumber != 0) return false;
|
if ((j - i) % repeatNumber != 0) return false;
|
||||||
else break;
|
else break;
|
||||||
|
|
||||||
// Removes every repeatNumber'th character.
|
std::wstring newSentence = L"";
|
||||||
sentence.erase(std::remove_if(sentence.begin(), sentence.end(), [&](const wchar_t& c) {return (&c - &*sentence.begin()) % repeatNumber != 0; }), sentence.end());
|
for (int i = 0; i < sentence.size(); ++i) if (i % repeatNumber == 0) newSentence.push_back(sentence[i]);
|
||||||
|
sentence = newSentence;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RemoveCyclicRepeats(std::wstring& sentence)
|
bool RemoveCyclicRepeats(std::wstring& sentence)
|
||||||
{
|
{
|
||||||
unsigned int realLength = 6; // If the first 6 characters appear later on, there's probably a repetition issue.
|
unsigned int junkLength = 0;
|
||||||
if (sentence.size() < realLength) return false;
|
wchar_t junk[2000] = {};
|
||||||
wchar_t realSentence[2000] = {};
|
while (wcsstr(sentence.c_str() + junkLength, junk))
|
||||||
memcpy(realSentence, sentence.c_str(), realLength * sizeof(wchar_t));
|
|
||||||
while (wcsstr(sentence.c_str() + realLength, realSentence))
|
|
||||||
{
|
{
|
||||||
realSentence[realLength] = sentence[realLength];
|
junk[junkLength] = sentence[junkLength];
|
||||||
if (++realLength >= 2000) return false;
|
if (++junkLength >= 2000) return false;
|
||||||
}
|
}
|
||||||
if (realLength > 7)
|
if (--junkLength >= 5) // If the first 5 characters appear later on, there's probably a repetition issue.
|
||||||
{
|
{
|
||||||
sentence = std::wstring(realSentence);
|
sentence = std::wstring(sentence.c_str() + junkLength);
|
||||||
sentence.pop_back();
|
|
||||||
RemoveCyclicRepeats(sentence);
|
RemoveCyclicRepeats(sentence);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RemoveRepeatedSentences(std::wstring& sentence, int threadHandle)
|
bool RemoveRepeatedSentences(std::wstring& sentence, int handle)
|
||||||
{
|
{
|
||||||
static std::set<std::pair<int, std::wstring>> seenSentences;
|
static std::set<std::pair<int, std::wstring>> seenSentences;
|
||||||
static std::mutex m;
|
static std::mutex m;
|
||||||
std::lock_guard<std::mutex> l(m);
|
std::lock_guard<std::mutex> l(m);
|
||||||
if (seenSentences.count({ threadHandle, sentence }) != 0) throw std::exception();
|
if (seenSentences.count({ handle, sentence }) != 0) throw std::exception();
|
||||||
seenSentences.insert({ threadHandle, sentence });
|
seenSentences.insert({ handle, sentence });
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ProcessSentence(std::wstring& sentence, const InfoForExtension* miscInfo)
|
bool ProcessSentence(std::wstring& sentence, const InfoForExtension* miscInfo)
|
||||||
{
|
{
|
||||||
if (GetProperty("hook address", miscInfo) == -1) return false;
|
if (GetProperty("hook address", miscInfo) == -1) return false;
|
||||||
return RemoveRepeatedChars(sentence) | RemoveCyclicRepeats(sentence) | RemoveRepeatedSentences(sentence, GetProperty("text handle", miscInfo));
|
bool ret = false;
|
||||||
|
ret |= RemoveRepeatedChars(sentence);
|
||||||
|
ret |= RemoveCyclicRepeats(sentence);
|
||||||
|
ret |= RemoveRepeatedSentences(sentence, GetProperty("text handle", miscInfo));
|
||||||
|
return ret;
|
||||||
}
|
}
|
Loading…
x
Reference in New Issue
Block a user