From 86415fca102e00c66d72dfecfd80b781e41b728f Mon Sep 17 00:00:00 2001 From: Akash Mozumdar Date: Sun, 29 Mar 2020 20:55:12 -0600 Subject: [PATCH] all official apis now supported and performance improvements --- extensions/bingtranslate.cpp | 23 +++++++-- extensions/deepltranslate.cpp | 85 +++++++++++++-------------------- extensions/googletranslate.cpp | 50 ++++--------------- extensions/lua.cpp | 2 +- extensions/network.cpp | 65 +++++++++++++++++-------- extensions/network.h | 16 ++++--- extensions/translatewrapper.cpp | 25 +++++++--- include/common.h | 5 ++ 8 files changed, 142 insertions(+), 129 deletions(-) diff --git a/extensions/bingtranslate.cpp b/extensions/bingtranslate.cpp index ae0ce3c..7ee526d 100644 --- a/extensions/bingtranslate.cpp +++ b/extensions/bingtranslate.cpp @@ -4,9 +4,10 @@ extern const wchar_t* TRANSLATION_ERROR; -extern Synchronized translateTo; +extern Synchronized translateTo, apiKey; const char* TRANSLATION_PROVIDER = "Bing Translate"; +const char* GET_API_KEY_FROM = "https://www.microsoft.com/en-us/translator/business/trial/#get-started"; QStringList languages { "Afrikaans: af", @@ -80,15 +81,31 @@ QStringList languages }; bool translateSelectedOnly = false, rateLimitAll = true, rateLimitSelected = false, useCache = true; -int tokenCount = 30, tokenRestoreDelay = 60000; +int tokenCount = 30, tokenRestoreDelay = 60000, maxSentenceSize = 500; std::pair Translate(const std::wstring& text, SentenceInfo) { + if (!apiKey->empty()) + if (HttpRequest httpRequest{ + L"Mozilla/5.0 Textractor", + L"api.cognitive.microsofttranslator.com", + L"POST", + FormatString(L"/translate?api-version=3.0&to=%s", translateTo.Copy()).c_str(), + FormatString(R"([{"text":"%s"}])", JSON::Escape(text)), + FormatString(L"Content-Type: application/json; charset=UTF-8\r\nOcp-Apim-Subscription-Key:%s", apiKey.Copy()).c_str() + }) + { + // Response formatted as JSON: translation starts with text":" and ends with ","to + if (std::wsmatch results; std::regex_search(httpRequest.response, results, std::wregex(L"text\":\"(.+?)\",\""))) return { true, results[1] }; + else return { false, FormatString(L"%s: %s", TRANSLATION_ERROR, httpRequest.response) }; + } + else return { false, FormatString(L"%s (code=%u)", TRANSLATION_ERROR, httpRequest.errorCode) }; + if (HttpRequest httpRequest{ L"Mozilla/5.0 Textractor", L"www.bing.com", L"POST", - FormatString(L"/ttranslatev3?fromLang=auto-detect&to=%s&text=%s", translateTo->c_str(), Escape(text)).c_str() + FormatString(L"/ttranslatev3?fromLang=auto-detect&to=%s&text=%s", translateTo.Copy(), Escape(text)).c_str() }) // Response formatted as JSON: translation starts with text":" and ends with ","to if (std::wsmatch results; std::regex_search(httpRequest.response, results, std::wregex(L"text\":\"(.+?)\",\""))) return { true, results[1] }; diff --git a/extensions/deepltranslate.cpp b/extensions/deepltranslate.cpp index 41b802b..a738627 100644 --- a/extensions/deepltranslate.cpp +++ b/extensions/deepltranslate.cpp @@ -6,11 +6,10 @@ extern const wchar_t* TRANSLATION_ERROR; extern const char* USE_PREV_SENTENCE_CONTEXT; -extern QSettings settings; -extern QFormLayout* display; -extern Synchronized translateTo; +extern Synchronized translateTo, apiKey; const char* TRANSLATION_PROVIDER = "DeepL Translate"; +const char* GET_API_KEY_FROM = "https://www.deepl.com/pro.html"; QStringList languages { "Chinese (simplified): ZH", @@ -27,45 +26,31 @@ QStringList languages }; bool translateSelectedOnly = true, rateLimitAll = true, rateLimitSelected = true, useCache = false; -int tokenCount = 10, tokenRestoreDelay = 60000; +int tokenCount = 10, tokenRestoreDelay = 60000, maxSentenceSize = 500; const wchar_t* accept[] = { L"*/*", nullptr }; - Synchronized LMTBID; -bool useContext = true; -Synchronized> context; - -BOOL WINAPI DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved) -{ - switch (ul_reason_for_call) - { - case DLL_PROCESS_ATTACH: - { - auto checkbox = new QCheckBox; - checkbox->setChecked(useContext); - display->addRow(USE_PREV_SENTENCE_CONTEXT, checkbox); - QObject::connect(checkbox, &QCheckBox::clicked, [](bool checked) { settings.setValue(USE_PREV_SENTENCE_CONTEXT, useContext = checked); }); - } - break; - case DLL_PROCESS_DETACH: - { - } - break; - } - return TRUE; -} - std::pair Translate(const std::wstring& text, SentenceInfo sentenceInfo) { + if (!apiKey->empty()) + if (HttpRequest httpRequest{ + L"Mozilla/5.0 Textractor", + L"api.deepl.com", + L"POST", + L"/v2/translate", + FormatString("text=%S&auth_key=%S&target_lang=%S", Escape(text), apiKey.Copy(), translateTo.Copy()), + L"Content-Type: application/x-www-form-urlencoded" + }) + // Response formatted as JSON: translation starts with text":" and ends with "}] + if (std::wsmatch results; std::regex_search(httpRequest.response, results, std::wregex(L"text\":\"(.+?)\"\\}\\]"))) return { true, results[1] }; + else return { false, FormatString(L"%s: %s", TRANSLATION_ERROR, httpRequest.response) }; + else return { false, FormatString(L"%s (code=%u)", TRANSLATION_ERROR, httpRequest.errorCode) }; + // the following code was reverse engineered from the DeepL website; it's as close as I could make it but I'm not sure what parts of this could be removed and still have it work - int64_t r = _time64(nullptr), n = std::count(text.begin(), text.end(), L'i') + 1LL; - static std::atomic id = 10000 * std::uniform_int_distribution(0, 9999)(std::mt19937(std::random_device()())); - std::string jsonText; - for (auto ch : WideStringToString(text)) - if (ch == '"') jsonText += "\\\""; - else jsonText += ch; - // user_preferred_langs? what should preferred_num_beans and priority be? does timestamp do anything? other translation quality options? + int64_t r = _time64(nullptr), n = std::count(text.begin(), text.end(), L'i') + 1; + int id = 10000 * std::uniform_int_distribution(0, 9999)(std::mt19937(std::random_device()())); + // user_preferred_langs? what should priority be? does timestamp do anything? other translation quality options? auto body = FormatString(R"( { "id": %d, @@ -75,39 +60,35 @@ std::pair Translate(const std::wstring& text, SentenceInfo s "priority": -1, "timestamp": %lld, "lang": { - "source_lang_user_selected": "auto", - "target_lang": "%s" + "target_lang": "%S", + "source_lang_user_selected": "auto" }, "jobs": [{ - "kind": "default", - "preferred_num_beams": 4, - "quality": "fast", - "raw_en_context_after": [], "raw_en_sentence": "%s", - "raw_en_context_before": [%s] + "raw_en_context_before": [], + "kind": "default", + "preferred_num_beams": 1, + "quality": "fast", + "raw_en_context_after": [] }] } } - )", ++id, r + (n - r % n), WideStringToString(translateTo->c_str()), jsonText, useContext ? WideStringToString(context->operator[](sentenceInfo["text number"])) : ""); - context->insert_or_assign(sentenceInfo["text number"], L'"' + text + L'"'); + )", ++id, r + (n - r % n), translateTo.Copy(), JSON::Escape(text)); // missing accept-encoding header since it fucks up HttpRequest - std::wstring headers = L"Host: www2.deepl.com\r\nAccept-Language: en-US,en;q=0.5\r\nContent-type: text/plain\r\nOrigin: https://www.deepl.com\r\nTE: Trailers" + LMTBID.Acquire().contents; + std::wstring headers = L"Host: www2.deepl.com\r\nAccept-Language: en-US,en;q=0.5\r\nContent-type: text/plain; charset=utf-8\r\nOrigin: https://www.deepl.com\r\nTE: Trailers" + LMTBID.Acquire().contents; if (HttpRequest httpRequest{ L"Mozilla/5.0 Textractor", L"www2.deepl.com", L"POST", L"/jsonrpc", + body, + headers.c_str(), + L"https://www.deepl.com/translator", WINHTTP_FLAG_SECURE, NULL, - L"https://www.deepl.com/translator", - accept, - headers.c_str(), - body.data(), - body.size() + accept }) { - auto LMTBID = httpRequest.headers.find(L"LMTBID="), end = httpRequest.headers.find(L';', LMTBID); // not sure if this cookie does anything - if (LMTBID != std::wstring::npos && end != std::wstring::npos) ::LMTBID->assign(L"\r\nCookie: " + httpRequest.headers.substr(LMTBID, end - LMTBID)); // Response formatted as JSON: translation starts with preprocessed_sentence":" and ends with "," if (std::wsmatch results; std::regex_search(httpRequest.response, results, std::wregex(L"postprocessed_sentence\":\"(.+?)\",\""))) return { true, results[1] }; else return { false, FormatString(L"%s: %s", TRANSLATION_ERROR, httpRequest.response) }; diff --git a/extensions/googletranslate.cpp b/extensions/googletranslate.cpp index a11376b..943f08b 100644 --- a/extensions/googletranslate.cpp +++ b/extensions/googletranslate.cpp @@ -4,13 +4,11 @@ #include extern const wchar_t* TRANSLATION_ERROR; -extern const char* API_KEY; -extern QFormLayout* display; -extern QSettings settings; -extern Synchronized translateTo; +extern Synchronized translateTo, apiKey; const char* TRANSLATION_PROVIDER = "Google Translate"; +const char* GET_API_KEY_FROM = "https://codelabs.developers.google.com/codelabs/cloud-translation-intro"; QStringList languages { "Afrikaans: af", @@ -125,43 +123,16 @@ QStringList languages }; bool translateSelectedOnly = false, rateLimitAll = true, rateLimitSelected = false, useCache = true; -int tokenCount = 30, tokenRestoreDelay = 60000; - -Synchronized key; +int tokenCount = 30, tokenRestoreDelay = 60000, maxSentenceSize = 500; unsigned TKK = 0; -BOOL WINAPI DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved) -{ - switch (ul_reason_for_call) - { - case DLL_PROCESS_ATTACH: - { - auto keyInput = new QLineEdit(settings.value(API_KEY).toString()); - key->assign(S(keyInput->text())); - QObject::connect(keyInput, &QLineEdit::textChanged, [](QString key) { settings.setValue(API_KEY, S(::key->assign(S(key)))); }); - display->addRow(API_KEY, keyInput); - auto googleCloudInfo = new QLabel( - "https://codelabs.developers.google.com/codelabs/cloud-translation-intro" - ); - googleCloudInfo->setOpenExternalLinks(true); - display->addRow(googleCloudInfo); - } - break; - case DLL_PROCESS_DETACH: - { - } - break; - } - return TRUE; -} - std::wstring GetTranslationUri(const std::wstring& text) { // If no TKK available, use this uri. Can't use too much or google will detect unauthorized access - if (!TKK) return FormatString(L"/translate_a/single?client=gtx&dt=ld&dt=rm&dt=t&tl=%s&q=%s", translateTo->c_str(), text); + if (!TKK) return FormatString(L"/translate_a/single?client=gtx&dt=ld&dt=rm&dt=t&tl=%s&q=%s", translateTo.Copy(), Escape(text)); - // Artikash 8/19/2018: reverse engineered from translate.google.com + // reverse engineered from translate.google.com std::wstring escapedText; unsigned a = time(NULL) / 3600, b = a; // the first part of TKK for (unsigned char ch : WideStringToString(text)) @@ -177,7 +148,7 @@ std::wstring GetTranslationUri(const std::wstring& text) a ^= TKK; a %= 1000000; - return FormatString(L"/translate_a/single?client=webapp&dt=ld&dt=rm&dt=t&sl=auto&tl=%s&tk=%u.%u&q=%s", translateTo->c_str(), a, a ^ b, escapedText); + return FormatString(L"/translate_a/single?client=webapp&dt=ld&dt=rm&dt=t&sl=auto&tl=%s&tk=%u.%u&q=%s", translateTo.Copy(), a, a ^ b, escapedText); } bool IsHash(const std::wstring& result) @@ -187,13 +158,13 @@ bool IsHash(const std::wstring& result) std::pair Translate(const std::wstring& text, SentenceInfo) { - if (!key->empty()) - { + if (!apiKey->empty()) if (HttpRequest httpRequest{ L"Mozilla/5.0 Textractor", L"translation.googleapis.com", - L"GET", - FormatString(L"/language/translate/v2?format=text&q=%s&target=%s&key=%s", Escape(text), translateTo->c_str(), key->c_str()).c_str() + L"POST", + FormatString(L"/language/translate/v2?format=text&target=%s&key=%s", translateTo.Copy(), apiKey.Copy()).c_str(), + FormatString(R"({"q":["%s"]})", JSON::Escape(text)) }) { // Response formatted as JSON: starts with "translatedText": " and translation is enclosed in quotes followed by a comma @@ -201,7 +172,6 @@ std::pair Translate(const std::wstring& text, SentenceInfo) return { false, FormatString(L"%s: %s", TRANSLATION_ERROR, httpRequest.response) }; } else return { false, FormatString(L"%s (code=%u)", TRANSLATION_ERROR, httpRequest.errorCode) }; - } if (!TKK) if (HttpRequest httpRequest{ L"Mozilla/5.0 Textractor", L"translate.google.com", L"GET", L"/" }) diff --git a/extensions/lua.cpp b/extensions/lua.cpp index 5af8012..c374527 100644 --- a/extensions/lua.cpp +++ b/extensions/lua.cpp @@ -91,7 +91,7 @@ bool ProcessSentence(std::wstring& sentence, SentenceInfo sentenceInfo) { revCount = ::revCount; luaL_dostring(L, "ProcessSentence = nil"); - if (luaL_dostring(L, script->c_str()) != LUA_OK) + if (luaL_dostring(L, script.Copy().c_str()) != LUA_OK) { sentence += L"\n" + FormatString(LUA_ERROR, StringToWideString(lua_tolstring(L, 1, nullptr))); lua_settop(L, 0); diff --git a/extensions/network.cpp b/extensions/network.cpp index a075ded..e6788e6 100644 --- a/extensions/network.cpp +++ b/extensions/network.cpp @@ -5,13 +5,12 @@ HttpRequest::HttpRequest( const wchar_t* serverName, const wchar_t* action, const wchar_t* objectName, + std::string body, + const wchar_t* headers, + const wchar_t* referrer, DWORD requestFlags, const wchar_t* httpVersion, - const wchar_t* referrer, - const wchar_t** acceptTypes, - const wchar_t* headers, - void* body, - DWORD bodyLength + const wchar_t** acceptTypes ) { static std::atomic internet = NULL; @@ -19,13 +18,13 @@ HttpRequest::HttpRequest( if (internet) if (InternetHandle connection = WinHttpConnect(internet, serverName, INTERNET_DEFAULT_HTTPS_PORT, 0)) if (InternetHandle request = WinHttpOpenRequest(connection, action, objectName, httpVersion, referrer, acceptTypes, requestFlags)) - if (WinHttpSendRequest(request, headers, -1UL, body, bodyLength, bodyLength, NULL)) + if (WinHttpSendRequest(request, headers, -1UL, body.empty() ? NULL : body.data(), body.size(), body.size(), NULL)) { - WinHttpReceiveResponse(request, NULL); - DWORD size = 0; - WinHttpQueryHeaders(request, WINHTTP_QUERY_RAW_HEADERS_CRLF, WINHTTP_HEADER_NAME_BY_INDEX, NULL, &size, WINHTTP_NO_HEADER_INDEX); - this->headers.resize(size); - WinHttpQueryHeaders(request, WINHTTP_QUERY_RAW_HEADERS_CRLF, WINHTTP_HEADER_NAME_BY_INDEX, this->headers.data(), &size, WINHTTP_NO_HEADER_INDEX); + //WinHttpReceiveResponse(request, NULL); + //DWORD size = 0; + //WinHttpQueryHeaders(request, WINHTTP_QUERY_RAW_HEADERS_CRLF, WINHTTP_HEADER_NAME_BY_INDEX, NULL, &size, WINHTTP_NO_HEADER_INDEX); + //this->headers.resize(size); + //WinHttpQueryHeaders(request, WINHTTP_QUERY_RAW_HEADERS_CRLF, WINHTTP_HEADER_NAME_BY_INDEX, this->headers.data(), &size, WINHTTP_NO_HEADER_INDEX); std::string data; DWORD availableSize, downloadedSize; do @@ -54,18 +53,44 @@ std::wstring Escape(const std::wstring& text) return escaped; } -void Unescape(std::wstring& text) +namespace JSON { - for (int i = 0; i < text.size(); ++i) + void Unescape(std::wstring& text) { - if (text[i] == L'\\') + for (int i = 0; i < text.size(); ++i) { - text[i] = 0; - if (text[i + 1] == L'r') text[i + 1] = 0; // for some reason \r gets displayed as a newline - if (text[i + 1] == L'n') text[i + 1] = L'\n'; - if (text[i + 1] == L't') text[i + 1] = L'\t'; - if (text[i + 1] == L'\\') ++i; + if (text[i] == L'\\') + { + text[i] = 0; + if (text[i + 1] == L'r') text[i + 1] = 0; // for some reason \r gets displayed as a newline + if (text[i + 1] == L'n') text[i + 1] = L'\n'; + if (text[i + 1] == L't') text[i + 1] = L'\t'; + if (text[i + 1] == L'\\') ++i; + } } + text.erase(std::remove(text.begin(), text.end(), 0), text.end()); + } + + std::string Escape(const std::wstring& text) + { + std::string escaped = WideStringToString(text); + int oldSize = escaped.size(); + escaped.resize(escaped.size() + std::count_if(escaped.begin(), escaped.end(), [](char ch) { return ch == '\n' || ch == '\r' || ch == '\t' || ch == '\\' || ch == '"'; })); + auto out = escaped.rbegin(); + for (int i = oldSize - 1; i >= 0; --i) + { + if (escaped[i] == '\n') *out++ = 'n'; + else if (escaped[i] == '\t') *out++ = 't'; + else if (escaped[i] == '\r') *out++ = 'r'; + else if (escaped[i] == '\\' || escaped[i] == '"') *out++ = escaped[i]; + else + { + *out++ = escaped[i]; + continue; + } + *out++ = '\\'; + } + escaped.erase(std::remove_if(escaped.begin(), escaped.end(), [](unsigned char ch) { return ch < 0x20; }), escaped.end()); + return escaped; } - text.erase(std::remove(text.begin(), text.end(), 0), text.end()); } diff --git a/extensions/network.h b/extensions/network.h index 9546cfa..b970700 100644 --- a/extensions/network.h +++ b/extensions/network.h @@ -12,13 +12,12 @@ struct HttpRequest const wchar_t* serverName, const wchar_t* action, const wchar_t* objectName, + std::string body = "", + const wchar_t* headers = NULL, + const wchar_t* referrer = NULL, DWORD requestFlags = WINHTTP_FLAG_SECURE | WINHTTP_FLAG_ESCAPE_DISABLE, const wchar_t* httpVersion = NULL, - const wchar_t* referrer = NULL, - const wchar_t** acceptTypes = NULL, - const wchar_t* headers = NULL, - void* body = NULL, - DWORD bodyLength = 0 + const wchar_t** acceptTypes = NULL ); operator bool() { return errorCode == ERROR_SUCCESS; } @@ -30,4 +29,9 @@ struct HttpRequest }; std::wstring Escape(const std::wstring& text); -void Unescape(std::wstring& text); + +namespace JSON +{ + void Unescape(std::wstring& text); + std::string Escape(const std::wstring& text); +} diff --git a/extensions/translatewrapper.cpp b/extensions/translatewrapper.cpp index 5a3868c..7eb3810 100644 --- a/extensions/translatewrapper.cpp +++ b/extensions/translatewrapper.cpp @@ -14,12 +14,15 @@ extern const char* RATE_LIMIT_SELECTED_THREAD; extern const char* USE_TRANS_CACHE; extern const char* RATE_LIMIT_TOKEN_COUNT; extern const char* RATE_LIMIT_TOKEN_RESTORE_DELAY; +extern const char* MAX_SENTENCE_SIZE; +extern const char* API_KEY; extern const wchar_t* TOO_MANY_TRANS_REQUESTS; extern const char* TRANSLATION_PROVIDER; +extern const char* GET_API_KEY_FROM; extern QStringList languages; extern bool translateSelectedOnly, rateLimitAll, rateLimitSelected, useCache; -extern int tokenCount, tokenRestoreDelay; +extern int tokenCount, tokenRestoreDelay, maxSentenceSize; std::pair Translate(const std::wstring& text, SentenceInfo sentenceInfo); const char* LANGUAGE = u8"Language"; @@ -27,7 +30,7 @@ const std::string TRANSLATION_CACHE_FILE = FormatString("%s Cache.txt", TRANSLAT QFormLayout* display; QSettings settings = openSettings(); -Synchronized translateTo = L"en"; +Synchronized translateTo = L"en", apiKey; Synchronized> translationCache; int savedSize; @@ -77,6 +80,7 @@ public: for (auto [value, label] : Array{ { tokenCount, RATE_LIMIT_TOKEN_COUNT }, { tokenRestoreDelay, RATE_LIMIT_TOKEN_RESTORE_DELAY }, + { maxSentenceSize, MAX_SENTENCE_SIZE }, }) { value = settings.value(label, value).toInt(); @@ -86,6 +90,15 @@ public: display->addRow(label, spinBox); connect(spinBox, qOverload(&QSpinBox::valueChanged), [label, &value](int newValue) { settings.setValue(label, value = newValue); }); } + if (GET_API_KEY_FROM) + { + auto keyInput = new QLineEdit(settings.value(API_KEY).toString()); + apiKey->assign(S(keyInput->text())); + QObject::connect(keyInput, &QLineEdit::textChanged, [](QString key) { settings.setValue(API_KEY, S(apiKey->assign(S(key)))); }); + auto keyLabel = new QLabel(QString("%2").arg(GET_API_KEY_FROM, API_KEY), this); + keyLabel->setOpenExternalLinks(true); + display->addRow(keyLabel, keyInput); + } setWindowTitle(TRANSLATION_PROVIDER); QMetaObject::invokeMethod(this, &QWidget::show, Qt::QueuedConnection); @@ -115,7 +128,7 @@ private: bool ProcessSentence(std::wstring& sentence, SentenceInfo sentenceInfo) { - if (sentenceInfo["text number"] == 0) return false; + if (sentenceInfo["text number"] == 0 || sentence.size() > maxSentenceSize) return false; static class { @@ -146,11 +159,9 @@ bool ProcessSentence(std::wstring& sentence, SentenceInfo sentenceInfo) if (cache) translationCache->try_emplace(sentence, translation); if (cache && translationCache->size() > savedSize + 50) SaveCache(); - Unescape(translation); + JSON::Unescape(translation); sentence += L"\n" + translation; return true; } -TEST( - assert(Translate(L"こんにちは").second.find(L"ello") != std::wstring::npos) -); +TEST(assert(Translate(L"こんにちは").second.find(L"ello") != std::wstring::npos)); diff --git a/include/common.h b/include/common.h index f62f28e..10f97e7 100644 --- a/include/common.h +++ b/include/common.h @@ -78,6 +78,11 @@ public: Locker Acquire() { return { std::unique_lock(m), contents }; } Locker operator->() { return Acquire(); } + T Copy() + { + return Acquire().contents; + } + private: T contents; M m;