From d31748c1058398a9e30874b27be1e857f331f066 Mon Sep 17 00:00:00 2001 From: Akash Mozumdar Date: Sat, 1 Sep 2018 04:24:22 -0400 Subject: [PATCH] refactor all extensions. move repetition detection to extension --- GUI/host/textthread.cc | 2 - extensions/copyclipboard.cpp | 32 +++------- extensions/extensions.h | 30 +++++++++ extensions/extranewlines.cpp | 21 ++---- extensions/googletranslate.cpp | 113 ++++++++++++++------------------- extensions/removerepeat.cpp | 86 ++++++++++++++----------- 6 files changed, 143 insertions(+), 141 deletions(-) diff --git a/GUI/host/textthread.cc b/GUI/host/textthread.cc index b53d7f9..efcf729 100644 --- a/GUI/host/textthread.cc +++ b/GUI/host/textthread.cc @@ -54,8 +54,6 @@ void TextThread::AddSentence(std::wstring sentence) void TextThread::AddText(const BYTE *con, int len) { LOCK(ttMutex); - // Artikash 8/27/2018: add repetition filter - if (len > 6 && buffer.data() && (strstr(buffer.data(), (const char*)con) || wcsstr((const wchar_t*)buffer.data(), (const wchar_t*)con))) return; buffer.insert(buffer.end(), con, con + len); timestamp = GetTickCount(); } diff --git a/extensions/copyclipboard.cpp b/extensions/copyclipboard.cpp index 53b639d..1104c8a 100644 --- a/extensions/copyclipboard.cpp +++ b/extensions/copyclipboard.cpp @@ -1,28 +1,16 @@ #include "extensions.h" -extern "C" +bool ProcessSentence(std::wstring& sentence, const InfoForExtension* miscInfo) { - /** - * Param sentence: pointer to sentence received by NextHooker (UTF-16). - * You should not modify this sentence. If you want NextHooker to receive a modified sentence, copy it into your own buffer and return that. - * Param miscInfo: pointer to start of singly linked list containing misc info about the sentence. - * Return value: pointer to sentence NextHooker takes for future processing and display. - * Return 'sentence' unless you created a new sentence/buffer as mentioned above. - * NextHooker will display the sentence after all extensions have had a chance to process and/or modify it. - * THIS FUNCTION MAY BE RUN SEVERAL TIMES CONCURRENTLY: PLEASE ENSURE THAT IT IS THREAD SAFE! - */ - __declspec(dllexport) const wchar_t* OnNewSentence(const wchar_t* sentence, const InfoForExtension* miscInfo) + if (GetProperty("current select", miscInfo) && GetProperty("hook address", miscInfo) != -1) { - if (GetProperty("current select", miscInfo) && GetProperty("hook address", miscInfo) != -1) - { - HGLOBAL hMem = GlobalAlloc(GMEM_MOVEABLE, (wcslen(sentence) + 1) * sizeof(wchar_t)); - memcpy(GlobalLock(hMem), sentence, (wcslen(sentence) + 1) * sizeof(wchar_t)); - GlobalUnlock(hMem); - OpenClipboard(0); - EmptyClipboard(); - SetClipboardData(CF_UNICODETEXT, hMem); - CloseClipboard(); - } - return sentence; + HGLOBAL hMem = GlobalAlloc(GMEM_MOVEABLE, (sentence.size() + 2) * sizeof(wchar_t)); + memcpy(GlobalLock(hMem), sentence.c_str(), (sentence.size() + 2) * sizeof(wchar_t)); + GlobalUnlock(hMem); + OpenClipboard(0); + EmptyClipboard(); + SetClipboardData(CF_UNICODETEXT, hMem); + CloseClipboard(); } + return false; } \ No newline at end of file diff --git a/extensions/extensions.h b/extensions/extensions.h index 49868c9..a911018 100644 --- a/extensions/extensions.h +++ b/extensions/extensions.h @@ -20,4 +20,34 @@ int GetProperty(const char* propertyName, const InfoForExtension* miscInfo) else miscInfoTraverser = miscInfoTraverser->nextProperty; return 0; +} + +/** + * Param sentence: entence received by NextHooker (UTF-16). + * Param miscInfo: pointer to start of singly linked list containing misc info about the sentence. + * Return value: whether the sentence was modified. + * NextHooker will display the sentence after all extensions have had a chance to process and/or modify it. + * THIS FUNCTION MAY BE RUN SEVERAL TIMES CONCURRENTLY: PLEASE ENSURE THAT IT IS THREAD SAFE! +*/ +bool ProcessSentence(std::wstring& sentence, const InfoForExtension* miscInfo); + +/** + * Param sentence: pointer to sentence received by NextHooker (UTF-16). + * You should not modify this sentence. If you want NextHooker to receive a modified sentence, copy it into your own buffer and return that. + * Param miscInfo: pointer to start of singly linked list containing misc info about the sentence. + * Return value: pointer to sentence NextHooker takes for future processing and display. + * Return 'sentence' unless you created a new sentence/buffer as mentioned above. + * NextHooker will display the sentence after all extensions have had a chance to process and/or modify it. + * THIS FUNCTION MAY BE RUN SEVERAL TIMES CONCURRENTLY: PLEASE ENSURE THAT IT IS THREAD SAFE! +*/ +extern "C" __declspec(dllexport) const wchar_t* OnNewSentence(const wchar_t* sentenceArr, const InfoForExtension* miscInfo) +{ + std::wstring sentence(sentenceArr); + if (ProcessSentence(sentence, miscInfo)) + { + wchar_t* newSentence = (wchar_t*)malloc((sentence.size() + 1) * sizeof(wchar_t*)); + wcscpy(newSentence, sentence.c_str()); + return newSentence; + } + else return sentenceArr; } \ No newline at end of file diff --git a/extensions/extranewlines.cpp b/extensions/extranewlines.cpp index 605b244..f8ac364 100644 --- a/extensions/extranewlines.cpp +++ b/extensions/extranewlines.cpp @@ -1,21 +1,8 @@ #include "extensions.h" -extern "C" +bool ProcessSentence(std::wstring& sentence, const InfoForExtension* miscInfo) { - /** - * Param sentence: pointer to sentence received by NextHooker (UTF-16). - * You should not modify this sentence. If you want NextHooker to receive a modified sentence, copy it into your own buffer and return that. - * Param miscInfo: pointer to start of singly linked list containing misc info about the sentence. - * Return value: pointer to sentence NextHooker takes for future processing and display. - * Return 'sentence' unless you created a new sentence/buffer as mentioned above. - * NextHooker will display the sentence after all extensions have had a chance to process and/or modify it. - * THIS FUNCTION MAY BE RUN SEVERAL TIMES CONCURRENTLY: PLEASE ENSURE THAT IT IS THREAD SAFE! - */ - __declspec(dllexport) const wchar_t* OnNewSentence(const wchar_t* sentence, const InfoForExtension* miscInfo) - { - if (GetProperty("hook address", miscInfo) == -1) return sentence; - wchar_t* newSentence = (wchar_t*)malloc((wcslen(sentence) + 6) * sizeof(wchar_t)); - swprintf(newSentence, wcslen(sentence) + 6, L"%s\r\n", sentence); - return newSentence; - } + if (GetProperty("hook address", miscInfo) == -1) return false; + sentence += L"\r\n"; + return true; } \ No newline at end of file diff --git a/extensions/googletranslate.cpp b/extensions/googletranslate.cpp index 5f45f64..6a2a316 100644 --- a/extensions/googletranslate.cpp +++ b/extensions/googletranslate.cpp @@ -30,84 +30,69 @@ std::wstring GetTranslationUri(const wchar_t* text, unsigned int TKK) return std::wstring(L"/translate_a/single?client=t&dt=ld&dt=rm&dt=t&tk=") + std::to_wstring(a) + L"." + std::to_wstring(b) + L"&q=" + std::wstring(text); } -extern "C" +bool ProcessSentence(std::wstring& sentence, const InfoForExtension* miscInfo) { - /** - * Param sentence: pointer to sentence received by NextHooker (UTF-16). - * You should not modify this sentence. If you want NextHooker to receive a modified sentence, copy it into your own buffer and return that. - * Param miscInfo: pointer to start of singly linked list containing misc info about the sentence. - * Return value: pointer to sentence NextHooker takes for future processing and display. - * Return 'sentence' unless you created a new sentence/buffer as mentioned above. - * NextHooker will display the sentence after all extensions have had a chance to process and/or modify it. - * THIS FUNCTION MAY BE RUN SEVERAL TIMES CONCURRENTLY: PLEASE ENSURE THAT IT IS THREAD SAFE! - */ - __declspec(dllexport) const wchar_t* OnNewSentence(const wchar_t* sentence, const InfoForExtension* miscInfo) + static HINTERNET internet = NULL; + if (!internet) internet = WinHttpOpen(L"Mozilla/5.0 NextHooker", WINHTTP_ACCESS_TYPE_DEFAULT_PROXY, NULL, NULL, 0); + static unsigned int TKK = 0; + + std::wstring translation(L""); + + if (GetProperty("hook address", miscInfo) == -1) return false; + + if (internet) { - static HINTERNET internet = NULL; - if (!internet) internet = WinHttpOpen(L"Mozilla/5.0 NextHooker", WINHTTP_ACCESS_TYPE_DEFAULT_PROXY, NULL, NULL, 0); - static unsigned int TKK = 0; - - wchar_t error[] = L"Error while translating."; - std::wstring translation(L""); - const wchar_t* message = error; - - if (wcslen(sentence) > 2000 || GetProperty("hook address", miscInfo) == -1) return sentence; - - if (internet) - { - if (!TKK) - if (HINTERNET connection = WinHttpConnect(internet, L"translate.google.com", INTERNET_DEFAULT_HTTPS_PORT, 0)) - { - if (HINTERNET request = WinHttpOpenRequest(connection, L"GET", L"/", NULL, NULL, NULL, WINHTTP_FLAG_SECURE)) - { - if (WinHttpSendRequest(request, NULL, 0, NULL, 0, 0, NULL)) - { - DWORD bytesRead; - char buffer[100000] = {}; // Google Translate page is ~64kb - WinHttpReceiveResponse(request, NULL); - WinHttpReadData(request, buffer, 100000, &bytesRead); - TKK = strtoll(strstr(buffer, "a\\x3d") + 5, nullptr, 10) + strtoll(strstr(buffer, "b\\x3d") + 5, nullptr, 10); - } - WinHttpCloseHandle(request); - } - WinHttpCloseHandle(connection); - } - + if (!TKK) if (HINTERNET connection = WinHttpConnect(internet, L"translate.google.com", INTERNET_DEFAULT_HTTPS_PORT, 0)) { - if (HINTERNET request = WinHttpOpenRequest(connection, L"GET", GetTranslationUri(sentence, TKK).c_str(), NULL, NULL, NULL, WINHTTP_FLAG_ESCAPE_DISABLE | WINHTTP_FLAG_SECURE)) + if (HINTERNET request = WinHttpOpenRequest(connection, L"GET", L"/", NULL, NULL, NULL, WINHTTP_FLAG_SECURE)) { if (WinHttpSendRequest(request, NULL, 0, NULL, 0, 0, NULL)) { DWORD bytesRead; - char buffer[10000] = {}; + char buffer[100000] = {}; // Google Translate page is ~64kb WinHttpReceiveResponse(request, NULL); - WinHttpReadData(request, buffer, 10000, &bytesRead); - // Response formatted as JSON: starts with '[[["' - if (buffer[0] == '[') - { - wchar_t wbuffer[10000] = {}; - MultiByteToWideChar(CP_UTF8, 0, (char*)buffer, -1, wbuffer, 10000); - std::wstring response(wbuffer); - std::wregex translationFinder(L"\\[\"(.*?)\",[n\"]"); - std::wsmatch results; - while (std::regex_search(response, results, translationFinder)) - { - translation += std::wstring(results[1]) + L" "; - response = results.suffix().str(); - } - for (auto& c : translation) if (c == L'\\') c = 0x200b; - message = translation.c_str(); - } + WinHttpReadData(request, buffer, 100000, &bytesRead); + TKK = strtoll(strstr(buffer, "a\\x3d") + 5, nullptr, 10) + strtoll(strstr(buffer, "b\\x3d") + 5, nullptr, 10); } WinHttpCloseHandle(request); } WinHttpCloseHandle(connection); } - } - wchar_t* newSentence = (wchar_t*)malloc((wcslen(sentence) + 3 + wcslen(message)) * sizeof(wchar_t)); - swprintf(newSentence, wcslen(sentence) + 3 + wcslen(message), L"%s%s%s", sentence, L"\r\n", message); - return newSentence; + if (HINTERNET connection = WinHttpConnect(internet, L"translate.google.com", INTERNET_DEFAULT_HTTPS_PORT, 0)) + { + if (HINTERNET request = WinHttpOpenRequest(connection, L"GET", GetTranslationUri(sentence.c_str(), TKK).c_str(), NULL, NULL, NULL, WINHTTP_FLAG_ESCAPE_DISABLE | WINHTTP_FLAG_SECURE)) + { + if (WinHttpSendRequest(request, NULL, 0, NULL, 0, 0, NULL)) + { + DWORD bytesRead; + char buffer[10000] = {}; + WinHttpReceiveResponse(request, NULL); + WinHttpReadData(request, buffer, 10000, &bytesRead); + // Response formatted as JSON: starts with '[[["' + if (buffer[0] == '[') + { + wchar_t wbuffer[10000] = {}; + MultiByteToWideChar(CP_UTF8, 0, (char*)buffer, -1, wbuffer, 10000); + std::wstring response(wbuffer); + std::wregex translationFinder(L"\\[\"(.*?)\",[n\"]"); + std::wsmatch results; + while (std::regex_search(response, results, translationFinder)) + { + translation += std::wstring(results[1]) + L" "; + response = results.suffix().str(); + } + for (auto& c : translation) if (c == L'\\') c = 0x200b; + } + } + WinHttpCloseHandle(request); + } + WinHttpCloseHandle(connection); + } } + + if (translation == L"") translation = L"Error while translating."; + sentence += L"\r\n" + translation; + return true; } \ No newline at end of file diff --git a/extensions/removerepeat.cpp b/extensions/removerepeat.cpp index 3b3b7ed..1e1d2ec 100644 --- a/extensions/removerepeat.cpp +++ b/extensions/removerepeat.cpp @@ -1,46 +1,60 @@ #include "extensions.h" +#include +#include #include -#include -std::wstring remove_side_spaces(const std::wstring& str) +bool RemoveRepeatedChars(std::wstring& sentence) { - auto begin = std::find_if_not(str.begin(), str.end(), std::iswspace); - if (begin == str.end()) return L""; - auto end = std::find_if_not(str.rbegin(), str.rend(), std::iswspace); - return std::wstring(begin, end.base()); + unsigned int repeatNumber = 0; + wchar_t prevChar = sentence[0]; + for (auto i : sentence) + if (i == prevChar) repeatNumber++; + else break; + if (repeatNumber == 1) return false; + + for (int i = 0; i < sentence.size(); i += repeatNumber) + for (int j = i; j < sentence.size(); ++j) + if (sentence[j] != sentence[i]) + if ((j - i) % repeatNumber != 0) return false; + else break; + + // Removes every repeatNumber'th character. + sentence.erase(std::remove_if(sentence.begin(), sentence.end(), [&](const wchar_t& c) {return (&c - &*sentence.begin()) % repeatNumber != 0; }), sentence.end()); + return true; } -extern "C" +bool RemoveCyclicRepeats(std::wstring& sentence) { - /** - * Param sentence: pointer to sentence received by NextHooker (UTF-16). - * You should not modify this sentence. If you want NextHooker to receive a modified sentence, copy it into your own buffer and return that. - * Param miscInfo: pointer to start of singly linked list containing misc info about the sentence. - * Return value: pointer to sentence NextHooker takes for future processing and display. - * Return 'sentence' unless you created a new sentence/buffer as mentioned above. - * NextHooker will display the sentence after all extensions have had a chance to process and/or modify it. - * THIS FUNCTION MAY BE RUN SEVERAL TIMES CONCURRENTLY: PLEASE ENSURE THAT IT IS THREAD SAFE! - */ - __declspec(dllexport) const wchar_t* OnNewSentence(const wchar_t* sentence, const InfoForExtension* miscInfo) + unsigned int realLength = 6; // If the first 6 characters appear later on, there's probably a repetition issue. + if (sentence.size() < realLength) return false; + wchar_t realSentence[2000] = {}; + memcpy(realSentence, sentence.c_str(), realLength * sizeof(wchar_t)); + while (wcsstr(sentence.c_str() + realLength, realSentence)) { - std::wstring sentenceStr = remove_side_spaces(std::wstring(sentence)); - unsigned long repeatNumber = 0; - wchar_t prevChar = sentenceStr[0]; - for (auto i : sentenceStr) - if (i == prevChar) repeatNumber++; - else break; - - for (int i = 0; i < sentenceStr.size(); i += repeatNumber) - for (int j = i; j < sentenceStr.size(); ++j) - if (sentenceStr[j] != sentenceStr[i]) - if ((j - i) % repeatNumber != 0) return sentence; - else break; - - if (repeatNumber == 1) return sentence; - sentenceStr.erase(std::remove_if(sentenceStr.begin(), sentenceStr.end(), [&](const wchar_t& c) {return (&c - &*sentenceStr.begin()) % repeatNumber != 0; }), sentenceStr.end()); - - wchar_t* newSentence = (wchar_t*)malloc((sentenceStr.size() + 2) * sizeof(wchar_t)); - wcscpy(newSentence, sentenceStr.c_str()); - return newSentence; + realSentence[realLength] = sentence[realLength]; + if (++realLength >= 2000) return false; } + if (realLength > 7) + { + sentence = std::wstring(realSentence); + RemoveCyclicRepeats(sentence); + return true; + } + return false; +} + +bool RemoveRepeatedSentences(std::wstring& sentence, int threadHandle) +{ + static std::set> seenSentences; + static std::mutex m; + std::lock_guard l(m); + if (seenSentences.count({ threadHandle, sentence }) != 0) throw std::exception(); + seenSentences.insert({ threadHandle, sentence }); + return false; +} + +bool ProcessSentence(std::wstring& sentence, const InfoForExtension* miscInfo) +{ + if (GetProperty("hook address", miscInfo) == -1) return false; + return RemoveRepeatedChars(sentence) | RemoveCyclicRepeats(sentence) | RemoveRepeatedSentences(sentence, GetProperty("thread handle", miscInfo)); } \ No newline at end of file