mirror of
https://github.com/Artikash/Textractor.git
synced 2025-01-10 09:43:55 +08:00
refactor all extensions. move repetition detection to extension
This commit is contained in:
parent
ca749691f8
commit
d31748c105
@ -54,8 +54,6 @@ void TextThread::AddSentence(std::wstring sentence)
|
|||||||
void TextThread::AddText(const BYTE *con, int len)
|
void TextThread::AddText(const BYTE *con, int len)
|
||||||
{
|
{
|
||||||
LOCK(ttMutex);
|
LOCK(ttMutex);
|
||||||
// Artikash 8/27/2018: add repetition filter
|
|
||||||
if (len > 6 && buffer.data() && (strstr(buffer.data(), (const char*)con) || wcsstr((const wchar_t*)buffer.data(), (const wchar_t*)con))) return;
|
|
||||||
buffer.insert(buffer.end(), con, con + len);
|
buffer.insert(buffer.end(), con, con + len);
|
||||||
timestamp = GetTickCount();
|
timestamp = GetTickCount();
|
||||||
}
|
}
|
||||||
|
@ -1,28 +1,16 @@
|
|||||||
#include "extensions.h"
|
#include "extensions.h"
|
||||||
|
|
||||||
extern "C"
|
bool ProcessSentence(std::wstring& sentence, const InfoForExtension* miscInfo)
|
||||||
{
|
{
|
||||||
/**
|
if (GetProperty("current select", miscInfo) && GetProperty("hook address", miscInfo) != -1)
|
||||||
* Param sentence: pointer to sentence received by NextHooker (UTF-16).
|
|
||||||
* You should not modify this sentence. If you want NextHooker to receive a modified sentence, copy it into your own buffer and return that.
|
|
||||||
* Param miscInfo: pointer to start of singly linked list containing misc info about the sentence.
|
|
||||||
* Return value: pointer to sentence NextHooker takes for future processing and display.
|
|
||||||
* Return 'sentence' unless you created a new sentence/buffer as mentioned above.
|
|
||||||
* NextHooker will display the sentence after all extensions have had a chance to process and/or modify it.
|
|
||||||
* THIS FUNCTION MAY BE RUN SEVERAL TIMES CONCURRENTLY: PLEASE ENSURE THAT IT IS THREAD SAFE!
|
|
||||||
*/
|
|
||||||
__declspec(dllexport) const wchar_t* OnNewSentence(const wchar_t* sentence, const InfoForExtension* miscInfo)
|
|
||||||
{
|
{
|
||||||
if (GetProperty("current select", miscInfo) && GetProperty("hook address", miscInfo) != -1)
|
HGLOBAL hMem = GlobalAlloc(GMEM_MOVEABLE, (sentence.size() + 2) * sizeof(wchar_t));
|
||||||
{
|
memcpy(GlobalLock(hMem), sentence.c_str(), (sentence.size() + 2) * sizeof(wchar_t));
|
||||||
HGLOBAL hMem = GlobalAlloc(GMEM_MOVEABLE, (wcslen(sentence) + 1) * sizeof(wchar_t));
|
GlobalUnlock(hMem);
|
||||||
memcpy(GlobalLock(hMem), sentence, (wcslen(sentence) + 1) * sizeof(wchar_t));
|
OpenClipboard(0);
|
||||||
GlobalUnlock(hMem);
|
EmptyClipboard();
|
||||||
OpenClipboard(0);
|
SetClipboardData(CF_UNICODETEXT, hMem);
|
||||||
EmptyClipboard();
|
CloseClipboard();
|
||||||
SetClipboardData(CF_UNICODETEXT, hMem);
|
|
||||||
CloseClipboard();
|
|
||||||
}
|
|
||||||
return sentence;
|
|
||||||
}
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
@ -21,3 +21,33 @@ int GetProperty(const char* propertyName, const InfoForExtension* miscInfo)
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Param sentence: entence received by NextHooker (UTF-16).
|
||||||
|
* Param miscInfo: pointer to start of singly linked list containing misc info about the sentence.
|
||||||
|
* Return value: whether the sentence was modified.
|
||||||
|
* NextHooker will display the sentence after all extensions have had a chance to process and/or modify it.
|
||||||
|
* THIS FUNCTION MAY BE RUN SEVERAL TIMES CONCURRENTLY: PLEASE ENSURE THAT IT IS THREAD SAFE!
|
||||||
|
*/
|
||||||
|
bool ProcessSentence(std::wstring& sentence, const InfoForExtension* miscInfo);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Param sentence: pointer to sentence received by NextHooker (UTF-16).
|
||||||
|
* You should not modify this sentence. If you want NextHooker to receive a modified sentence, copy it into your own buffer and return that.
|
||||||
|
* Param miscInfo: pointer to start of singly linked list containing misc info about the sentence.
|
||||||
|
* Return value: pointer to sentence NextHooker takes for future processing and display.
|
||||||
|
* Return 'sentence' unless you created a new sentence/buffer as mentioned above.
|
||||||
|
* NextHooker will display the sentence after all extensions have had a chance to process and/or modify it.
|
||||||
|
* THIS FUNCTION MAY BE RUN SEVERAL TIMES CONCURRENTLY: PLEASE ENSURE THAT IT IS THREAD SAFE!
|
||||||
|
*/
|
||||||
|
extern "C" __declspec(dllexport) const wchar_t* OnNewSentence(const wchar_t* sentenceArr, const InfoForExtension* miscInfo)
|
||||||
|
{
|
||||||
|
std::wstring sentence(sentenceArr);
|
||||||
|
if (ProcessSentence(sentence, miscInfo))
|
||||||
|
{
|
||||||
|
wchar_t* newSentence = (wchar_t*)malloc((sentence.size() + 1) * sizeof(wchar_t*));
|
||||||
|
wcscpy(newSentence, sentence.c_str());
|
||||||
|
return newSentence;
|
||||||
|
}
|
||||||
|
else return sentenceArr;
|
||||||
|
}
|
@ -1,21 +1,8 @@
|
|||||||
#include "extensions.h"
|
#include "extensions.h"
|
||||||
|
|
||||||
extern "C"
|
bool ProcessSentence(std::wstring& sentence, const InfoForExtension* miscInfo)
|
||||||
{
|
{
|
||||||
/**
|
if (GetProperty("hook address", miscInfo) == -1) return false;
|
||||||
* Param sentence: pointer to sentence received by NextHooker (UTF-16).
|
sentence += L"\r\n";
|
||||||
* You should not modify this sentence. If you want NextHooker to receive a modified sentence, copy it into your own buffer and return that.
|
return true;
|
||||||
* Param miscInfo: pointer to start of singly linked list containing misc info about the sentence.
|
|
||||||
* Return value: pointer to sentence NextHooker takes for future processing and display.
|
|
||||||
* Return 'sentence' unless you created a new sentence/buffer as mentioned above.
|
|
||||||
* NextHooker will display the sentence after all extensions have had a chance to process and/or modify it.
|
|
||||||
* THIS FUNCTION MAY BE RUN SEVERAL TIMES CONCURRENTLY: PLEASE ENSURE THAT IT IS THREAD SAFE!
|
|
||||||
*/
|
|
||||||
__declspec(dllexport) const wchar_t* OnNewSentence(const wchar_t* sentence, const InfoForExtension* miscInfo)
|
|
||||||
{
|
|
||||||
if (GetProperty("hook address", miscInfo) == -1) return sentence;
|
|
||||||
wchar_t* newSentence = (wchar_t*)malloc((wcslen(sentence) + 6) * sizeof(wchar_t));
|
|
||||||
swprintf(newSentence, wcslen(sentence) + 6, L"%s\r\n", sentence);
|
|
||||||
return newSentence;
|
|
||||||
}
|
|
||||||
}
|
}
|
@ -30,84 +30,69 @@ std::wstring GetTranslationUri(const wchar_t* text, unsigned int TKK)
|
|||||||
return std::wstring(L"/translate_a/single?client=t&dt=ld&dt=rm&dt=t&tk=") + std::to_wstring(a) + L"." + std::to_wstring(b) + L"&q=" + std::wstring(text);
|
return std::wstring(L"/translate_a/single?client=t&dt=ld&dt=rm&dt=t&tk=") + std::to_wstring(a) + L"." + std::to_wstring(b) + L"&q=" + std::wstring(text);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C"
|
bool ProcessSentence(std::wstring& sentence, const InfoForExtension* miscInfo)
|
||||||
{
|
{
|
||||||
/**
|
static HINTERNET internet = NULL;
|
||||||
* Param sentence: pointer to sentence received by NextHooker (UTF-16).
|
if (!internet) internet = WinHttpOpen(L"Mozilla/5.0 NextHooker", WINHTTP_ACCESS_TYPE_DEFAULT_PROXY, NULL, NULL, 0);
|
||||||
* You should not modify this sentence. If you want NextHooker to receive a modified sentence, copy it into your own buffer and return that.
|
static unsigned int TKK = 0;
|
||||||
* Param miscInfo: pointer to start of singly linked list containing misc info about the sentence.
|
|
||||||
* Return value: pointer to sentence NextHooker takes for future processing and display.
|
std::wstring translation(L"");
|
||||||
* Return 'sentence' unless you created a new sentence/buffer as mentioned above.
|
|
||||||
* NextHooker will display the sentence after all extensions have had a chance to process and/or modify it.
|
if (GetProperty("hook address", miscInfo) == -1) return false;
|
||||||
* THIS FUNCTION MAY BE RUN SEVERAL TIMES CONCURRENTLY: PLEASE ENSURE THAT IT IS THREAD SAFE!
|
|
||||||
*/
|
if (internet)
|
||||||
__declspec(dllexport) const wchar_t* OnNewSentence(const wchar_t* sentence, const InfoForExtension* miscInfo)
|
|
||||||
{
|
{
|
||||||
static HINTERNET internet = NULL;
|
if (!TKK)
|
||||||
if (!internet) internet = WinHttpOpen(L"Mozilla/5.0 NextHooker", WINHTTP_ACCESS_TYPE_DEFAULT_PROXY, NULL, NULL, 0);
|
|
||||||
static unsigned int TKK = 0;
|
|
||||||
|
|
||||||
wchar_t error[] = L"Error while translating.";
|
|
||||||
std::wstring translation(L"");
|
|
||||||
const wchar_t* message = error;
|
|
||||||
|
|
||||||
if (wcslen(sentence) > 2000 || GetProperty("hook address", miscInfo) == -1) return sentence;
|
|
||||||
|
|
||||||
if (internet)
|
|
||||||
{
|
|
||||||
if (!TKK)
|
|
||||||
if (HINTERNET connection = WinHttpConnect(internet, L"translate.google.com", INTERNET_DEFAULT_HTTPS_PORT, 0))
|
|
||||||
{
|
|
||||||
if (HINTERNET request = WinHttpOpenRequest(connection, L"GET", L"/", NULL, NULL, NULL, WINHTTP_FLAG_SECURE))
|
|
||||||
{
|
|
||||||
if (WinHttpSendRequest(request, NULL, 0, NULL, 0, 0, NULL))
|
|
||||||
{
|
|
||||||
DWORD bytesRead;
|
|
||||||
char buffer[100000] = {}; // Google Translate page is ~64kb
|
|
||||||
WinHttpReceiveResponse(request, NULL);
|
|
||||||
WinHttpReadData(request, buffer, 100000, &bytesRead);
|
|
||||||
TKK = strtoll(strstr(buffer, "a\\x3d") + 5, nullptr, 10) + strtoll(strstr(buffer, "b\\x3d") + 5, nullptr, 10);
|
|
||||||
}
|
|
||||||
WinHttpCloseHandle(request);
|
|
||||||
}
|
|
||||||
WinHttpCloseHandle(connection);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (HINTERNET connection = WinHttpConnect(internet, L"translate.google.com", INTERNET_DEFAULT_HTTPS_PORT, 0))
|
if (HINTERNET connection = WinHttpConnect(internet, L"translate.google.com", INTERNET_DEFAULT_HTTPS_PORT, 0))
|
||||||
{
|
{
|
||||||
if (HINTERNET request = WinHttpOpenRequest(connection, L"GET", GetTranslationUri(sentence, TKK).c_str(), NULL, NULL, NULL, WINHTTP_FLAG_ESCAPE_DISABLE | WINHTTP_FLAG_SECURE))
|
if (HINTERNET request = WinHttpOpenRequest(connection, L"GET", L"/", NULL, NULL, NULL, WINHTTP_FLAG_SECURE))
|
||||||
{
|
{
|
||||||
if (WinHttpSendRequest(request, NULL, 0, NULL, 0, 0, NULL))
|
if (WinHttpSendRequest(request, NULL, 0, NULL, 0, 0, NULL))
|
||||||
{
|
{
|
||||||
DWORD bytesRead;
|
DWORD bytesRead;
|
||||||
char buffer[10000] = {};
|
char buffer[100000] = {}; // Google Translate page is ~64kb
|
||||||
WinHttpReceiveResponse(request, NULL);
|
WinHttpReceiveResponse(request, NULL);
|
||||||
WinHttpReadData(request, buffer, 10000, &bytesRead);
|
WinHttpReadData(request, buffer, 100000, &bytesRead);
|
||||||
// Response formatted as JSON: starts with '[[["'
|
TKK = strtoll(strstr(buffer, "a\\x3d") + 5, nullptr, 10) + strtoll(strstr(buffer, "b\\x3d") + 5, nullptr, 10);
|
||||||
if (buffer[0] == '[')
|
|
||||||
{
|
|
||||||
wchar_t wbuffer[10000] = {};
|
|
||||||
MultiByteToWideChar(CP_UTF8, 0, (char*)buffer, -1, wbuffer, 10000);
|
|
||||||
std::wstring response(wbuffer);
|
|
||||||
std::wregex translationFinder(L"\\[\"(.*?)\",[n\"]");
|
|
||||||
std::wsmatch results;
|
|
||||||
while (std::regex_search(response, results, translationFinder))
|
|
||||||
{
|
|
||||||
translation += std::wstring(results[1]) + L" ";
|
|
||||||
response = results.suffix().str();
|
|
||||||
}
|
|
||||||
for (auto& c : translation) if (c == L'\\') c = 0x200b;
|
|
||||||
message = translation.c_str();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
WinHttpCloseHandle(request);
|
WinHttpCloseHandle(request);
|
||||||
}
|
}
|
||||||
WinHttpCloseHandle(connection);
|
WinHttpCloseHandle(connection);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
wchar_t* newSentence = (wchar_t*)malloc((wcslen(sentence) + 3 + wcslen(message)) * sizeof(wchar_t));
|
if (HINTERNET connection = WinHttpConnect(internet, L"translate.google.com", INTERNET_DEFAULT_HTTPS_PORT, 0))
|
||||||
swprintf(newSentence, wcslen(sentence) + 3 + wcslen(message), L"%s%s%s", sentence, L"\r\n", message);
|
{
|
||||||
return newSentence;
|
if (HINTERNET request = WinHttpOpenRequest(connection, L"GET", GetTranslationUri(sentence.c_str(), TKK).c_str(), NULL, NULL, NULL, WINHTTP_FLAG_ESCAPE_DISABLE | WINHTTP_FLAG_SECURE))
|
||||||
|
{
|
||||||
|
if (WinHttpSendRequest(request, NULL, 0, NULL, 0, 0, NULL))
|
||||||
|
{
|
||||||
|
DWORD bytesRead;
|
||||||
|
char buffer[10000] = {};
|
||||||
|
WinHttpReceiveResponse(request, NULL);
|
||||||
|
WinHttpReadData(request, buffer, 10000, &bytesRead);
|
||||||
|
// Response formatted as JSON: starts with '[[["'
|
||||||
|
if (buffer[0] == '[')
|
||||||
|
{
|
||||||
|
wchar_t wbuffer[10000] = {};
|
||||||
|
MultiByteToWideChar(CP_UTF8, 0, (char*)buffer, -1, wbuffer, 10000);
|
||||||
|
std::wstring response(wbuffer);
|
||||||
|
std::wregex translationFinder(L"\\[\"(.*?)\",[n\"]");
|
||||||
|
std::wsmatch results;
|
||||||
|
while (std::regex_search(response, results, translationFinder))
|
||||||
|
{
|
||||||
|
translation += std::wstring(results[1]) + L" ";
|
||||||
|
response = results.suffix().str();
|
||||||
|
}
|
||||||
|
for (auto& c : translation) if (c == L'\\') c = 0x200b;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
WinHttpCloseHandle(request);
|
||||||
|
}
|
||||||
|
WinHttpCloseHandle(connection);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (translation == L"") translation = L"Error while translating.";
|
||||||
|
sentence += L"\r\n" + translation;
|
||||||
|
return true;
|
||||||
}
|
}
|
@ -1,46 +1,60 @@
|
|||||||
#include "extensions.h"
|
#include "extensions.h"
|
||||||
|
#include <set>
|
||||||
|
#include <mutex>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cwctype>
|
|
||||||
|
|
||||||
std::wstring remove_side_spaces(const std::wstring& str)
|
bool RemoveRepeatedChars(std::wstring& sentence)
|
||||||
{
|
{
|
||||||
auto begin = std::find_if_not(str.begin(), str.end(), std::iswspace);
|
unsigned int repeatNumber = 0;
|
||||||
if (begin == str.end()) return L"";
|
wchar_t prevChar = sentence[0];
|
||||||
auto end = std::find_if_not(str.rbegin(), str.rend(), std::iswspace);
|
for (auto i : sentence)
|
||||||
return std::wstring(begin, end.base());
|
if (i == prevChar) repeatNumber++;
|
||||||
|
else break;
|
||||||
|
if (repeatNumber == 1) return false;
|
||||||
|
|
||||||
|
for (int i = 0; i < sentence.size(); i += repeatNumber)
|
||||||
|
for (int j = i; j < sentence.size(); ++j)
|
||||||
|
if (sentence[j] != sentence[i])
|
||||||
|
if ((j - i) % repeatNumber != 0) return false;
|
||||||
|
else break;
|
||||||
|
|
||||||
|
// Removes every repeatNumber'th character.
|
||||||
|
sentence.erase(std::remove_if(sentence.begin(), sentence.end(), [&](const wchar_t& c) {return (&c - &*sentence.begin()) % repeatNumber != 0; }), sentence.end());
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C"
|
bool RemoveCyclicRepeats(std::wstring& sentence)
|
||||||
{
|
{
|
||||||
/**
|
unsigned int realLength = 6; // If the first 6 characters appear later on, there's probably a repetition issue.
|
||||||
* Param sentence: pointer to sentence received by NextHooker (UTF-16).
|
if (sentence.size() < realLength) return false;
|
||||||
* You should not modify this sentence. If you want NextHooker to receive a modified sentence, copy it into your own buffer and return that.
|
wchar_t realSentence[2000] = {};
|
||||||
* Param miscInfo: pointer to start of singly linked list containing misc info about the sentence.
|
memcpy(realSentence, sentence.c_str(), realLength * sizeof(wchar_t));
|
||||||
* Return value: pointer to sentence NextHooker takes for future processing and display.
|
while (wcsstr(sentence.c_str() + realLength, realSentence))
|
||||||
* Return 'sentence' unless you created a new sentence/buffer as mentioned above.
|
|
||||||
* NextHooker will display the sentence after all extensions have had a chance to process and/or modify it.
|
|
||||||
* THIS FUNCTION MAY BE RUN SEVERAL TIMES CONCURRENTLY: PLEASE ENSURE THAT IT IS THREAD SAFE!
|
|
||||||
*/
|
|
||||||
__declspec(dllexport) const wchar_t* OnNewSentence(const wchar_t* sentence, const InfoForExtension* miscInfo)
|
|
||||||
{
|
{
|
||||||
std::wstring sentenceStr = remove_side_spaces(std::wstring(sentence));
|
realSentence[realLength] = sentence[realLength];
|
||||||
unsigned long repeatNumber = 0;
|
if (++realLength >= 2000) return false;
|
||||||
wchar_t prevChar = sentenceStr[0];
|
|
||||||
for (auto i : sentenceStr)
|
|
||||||
if (i == prevChar) repeatNumber++;
|
|
||||||
else break;
|
|
||||||
|
|
||||||
for (int i = 0; i < sentenceStr.size(); i += repeatNumber)
|
|
||||||
for (int j = i; j < sentenceStr.size(); ++j)
|
|
||||||
if (sentenceStr[j] != sentenceStr[i])
|
|
||||||
if ((j - i) % repeatNumber != 0) return sentence;
|
|
||||||
else break;
|
|
||||||
|
|
||||||
if (repeatNumber == 1) return sentence;
|
|
||||||
sentenceStr.erase(std::remove_if(sentenceStr.begin(), sentenceStr.end(), [&](const wchar_t& c) {return (&c - &*sentenceStr.begin()) % repeatNumber != 0; }), sentenceStr.end());
|
|
||||||
|
|
||||||
wchar_t* newSentence = (wchar_t*)malloc((sentenceStr.size() + 2) * sizeof(wchar_t));
|
|
||||||
wcscpy(newSentence, sentenceStr.c_str());
|
|
||||||
return newSentence;
|
|
||||||
}
|
}
|
||||||
|
if (realLength > 7)
|
||||||
|
{
|
||||||
|
sentence = std::wstring(realSentence);
|
||||||
|
RemoveCyclicRepeats(sentence);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool RemoveRepeatedSentences(std::wstring& sentence, int threadHandle)
|
||||||
|
{
|
||||||
|
static std::set<std::pair<int, std::wstring>> seenSentences;
|
||||||
|
static std::mutex m;
|
||||||
|
std::lock_guard<std::mutex> l(m);
|
||||||
|
if (seenSentences.count({ threadHandle, sentence }) != 0) throw std::exception();
|
||||||
|
seenSentences.insert({ threadHandle, sentence });
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ProcessSentence(std::wstring& sentence, const InfoForExtension* miscInfo)
|
||||||
|
{
|
||||||
|
if (GetProperty("hook address", miscInfo) == -1) return false;
|
||||||
|
return RemoveRepeatedChars(sentence) | RemoveCyclicRepeats(sentence) | RemoveRepeatedSentences(sentence, GetProperty("thread handle", miscInfo));
|
||||||
}
|
}
|
Loading…
x
Reference in New Issue
Block a user