mirror of
https://github.com/Artikash/Textractor.git
synced 2024-12-23 17:04:12 +08:00
upgrade repetition remover and add tests
This commit is contained in:
parent
bdc083a62a
commit
54c1b508d5
@ -12,6 +12,8 @@ add_library(Google\ Translate SHARED googletranslate.cpp extensionimpl.cpp)
|
||||
add_library(Regex\ Filter SHARED regexfilter.cpp extensionimpl.cpp)
|
||||
add_library(Remove\ Repetition SHARED removerepeat.cpp extensionimpl.cpp)
|
||||
|
||||
add_executable(Extension_Tests extensiontester.cpp)
|
||||
|
||||
target_link_libraries(Bing\ Translate winhttp Qt5::Widgets)
|
||||
target_link_libraries(Extra\ Window Qt5::Widgets)
|
||||
target_link_libraries(Google\ Translate winhttp Qt5::Widgets)
|
||||
|
11
extensions/extensiontester.cpp
Normal file
11
extensions/extensiontester.cpp
Normal file
@ -0,0 +1,11 @@
|
||||
#include "common.h"
|
||||
#include <filesystem>
|
||||
|
||||
int main()
|
||||
{
|
||||
wchar_t path[MAX_PATH] = {};
|
||||
GetModuleFileNameW(NULL, path, MAX_PATH);
|
||||
*(wcsrchr(path, L'\\') + 1) = 0;
|
||||
for (auto file : std::filesystem::directory_iterator(path))
|
||||
if (file.path().extension() == L".dll") LoadLibraryW(file.path().c_str());
|
||||
}
|
@ -1,22 +1,34 @@
|
||||
#include "extension.h"
|
||||
#include "defs.h"
|
||||
|
||||
void RemoveRepeatedChars(std::wstring& sentence)
|
||||
{
|
||||
int repeatNumber = 0;
|
||||
wchar_t prevChar = sentence[0];
|
||||
for (auto c : sentence)
|
||||
if (c == prevChar) repeatNumber++;
|
||||
else break;
|
||||
if (repeatNumber == 1) return;
|
||||
std::vector<int> repeatNumbers(sentence.size() + 1, 0);
|
||||
int repeatNumber = 1;
|
||||
wchar_t prevChar = L'\0';
|
||||
for (auto nextChar : sentence)
|
||||
if (nextChar == prevChar) repeatNumber++;
|
||||
else
|
||||
{
|
||||
prevChar = nextChar;
|
||||
++repeatNumbers.at(repeatNumber);
|
||||
repeatNumber = 1;
|
||||
}
|
||||
if ((repeatNumber = std::distance(repeatNumbers.begin(), std::max_element(repeatNumbers.begin(), repeatNumbers.end()))) == 1) return;
|
||||
|
||||
for (int i = 0; i < sentence.size(); i += repeatNumber)
|
||||
for (int j = i; j < sentence.size(); ++j)
|
||||
if (sentence[j] != sentence[i])
|
||||
if ((j - i) % repeatNumber != 0) return;
|
||||
else break;
|
||||
|
||||
std::wstring newSentence = L"";
|
||||
for (int i = 0; i < sentence.size(); i += repeatNumber) newSentence.push_back(sentence[i]);
|
||||
std::wstring newSentence;
|
||||
for (int i = 0; i < sentence.size();)
|
||||
{
|
||||
newSentence.push_back(sentence.at(i));
|
||||
for (int j = i; j <= sentence.size(); ++j)
|
||||
{
|
||||
if (j == sentence.size() || sentence.at(i) != sentence.at(j))
|
||||
{
|
||||
i += (j - i) % repeatNumber == 0 ? repeatNumber : 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
sentence = newSentence;
|
||||
}
|
||||
|
||||
@ -38,3 +50,22 @@ bool ProcessSentence(std::wstring& sentence, SentenceInfo sentenceInfo)
|
||||
RemoveCyclicRepeats(sentence);
|
||||
return true;
|
||||
}
|
||||
|
||||
TEST(
|
||||
{
|
||||
std::wstring repeatedChars = L"aaaaaaaaaaaabbbbbbcccdddaabbbcccddd";
|
||||
RemoveRepeatedChars(repeatedChars);
|
||||
assert(repeatedChars.find(L"aaaabbcd") == 0);
|
||||
|
||||
std::wstring cyclicRepeats = L"abcdeabcdefabcdefgabcdefgabcdefgabcdefgabcdefg";
|
||||
RemoveCyclicRepeats(cyclicRepeats);
|
||||
assert(cyclicRepeats == L"abcdefg");
|
||||
|
||||
InfoForExtension tester{ "hook address", 0, nullptr };
|
||||
std::wstring empty = L"", one = L" ", normal = L"This is a normal sentence. ‚Í‚¢<E2809A>B";
|
||||
ProcessSentence(empty, { &tester });
|
||||
ProcessSentence(one, { &tester });
|
||||
ProcessSentence(normal, { &tester });
|
||||
assert(empty == L"" && one == L" " && normal == L"This is a normal sentence. ‚Í‚¢<E2809A>B");
|
||||
}
|
||||
);
|
||||
|
Loading…
x
Reference in New Issue
Block a user