all official apis now supported and performance improvements

This commit is contained in:
Akash Mozumdar 2020-03-29 20:55:12 -06:00
parent ecab473482
commit 86415fca10
8 changed files with 142 additions and 129 deletions

View File

@ -4,9 +4,10 @@
extern const wchar_t* TRANSLATION_ERROR;
extern Synchronized<std::wstring> translateTo;
extern Synchronized<std::wstring> translateTo, apiKey;
const char* TRANSLATION_PROVIDER = "Bing Translate";
const char* GET_API_KEY_FROM = "https://www.microsoft.com/en-us/translator/business/trial/#get-started";
QStringList languages
{
"Afrikaans: af",
@ -80,15 +81,31 @@ QStringList languages
};
bool translateSelectedOnly = false, rateLimitAll = true, rateLimitSelected = false, useCache = true;
int tokenCount = 30, tokenRestoreDelay = 60000;
int tokenCount = 30, tokenRestoreDelay = 60000, maxSentenceSize = 500;
std::pair<bool, std::wstring> Translate(const std::wstring& text, SentenceInfo)
{
if (!apiKey->empty())
if (HttpRequest httpRequest{
L"Mozilla/5.0 Textractor",
L"api.cognitive.microsofttranslator.com",
L"POST",
FormatString(L"/translate?api-version=3.0&to=%s", translateTo.Copy()).c_str(),
FormatString(R"([{"text":"%s"}])", JSON::Escape(text)),
FormatString(L"Content-Type: application/json; charset=UTF-8\r\nOcp-Apim-Subscription-Key:%s", apiKey.Copy()).c_str()
})
{
// Response formatted as JSON: translation starts with text":" and ends with ","to
if (std::wsmatch results; std::regex_search(httpRequest.response, results, std::wregex(L"text\":\"(.+?)\",\""))) return { true, results[1] };
else return { false, FormatString(L"%s: %s", TRANSLATION_ERROR, httpRequest.response) };
}
else return { false, FormatString(L"%s (code=%u)", TRANSLATION_ERROR, httpRequest.errorCode) };
if (HttpRequest httpRequest{
L"Mozilla/5.0 Textractor",
L"www.bing.com",
L"POST",
FormatString(L"/ttranslatev3?fromLang=auto-detect&to=%s&text=%s", translateTo->c_str(), Escape(text)).c_str()
FormatString(L"/ttranslatev3?fromLang=auto-detect&to=%s&text=%s", translateTo.Copy(), Escape(text)).c_str()
})
// Response formatted as JSON: translation starts with text":" and ends with ","to
if (std::wsmatch results; std::regex_search(httpRequest.response, results, std::wregex(L"text\":\"(.+?)\",\""))) return { true, results[1] };

View File

@ -6,11 +6,10 @@
extern const wchar_t* TRANSLATION_ERROR;
extern const char* USE_PREV_SENTENCE_CONTEXT;
extern QSettings settings;
extern QFormLayout* display;
extern Synchronized<std::wstring> translateTo;
extern Synchronized<std::wstring> translateTo, apiKey;
const char* TRANSLATION_PROVIDER = "DeepL Translate";
const char* GET_API_KEY_FROM = "https://www.deepl.com/pro.html";
QStringList languages
{
"Chinese (simplified): ZH",
@ -27,45 +26,31 @@ QStringList languages
};
bool translateSelectedOnly = true, rateLimitAll = true, rateLimitSelected = true, useCache = false;
int tokenCount = 10, tokenRestoreDelay = 60000;
int tokenCount = 10, tokenRestoreDelay = 60000, maxSentenceSize = 500;
const wchar_t* accept[] = { L"*/*", nullptr };
Synchronized<std::wstring> LMTBID;
bool useContext = true;
Synchronized<std::unordered_map<int64_t, std::wstring>> context;
BOOL WINAPI DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved)
{
switch (ul_reason_for_call)
{
case DLL_PROCESS_ATTACH:
{
auto checkbox = new QCheckBox;
checkbox->setChecked(useContext);
display->addRow(USE_PREV_SENTENCE_CONTEXT, checkbox);
QObject::connect(checkbox, &QCheckBox::clicked, [](bool checked) { settings.setValue(USE_PREV_SENTENCE_CONTEXT, useContext = checked); });
}
break;
case DLL_PROCESS_DETACH:
{
}
break;
}
return TRUE;
}
std::pair<bool, std::wstring> Translate(const std::wstring& text, SentenceInfo sentenceInfo)
{
if (!apiKey->empty())
if (HttpRequest httpRequest{
L"Mozilla/5.0 Textractor",
L"api.deepl.com",
L"POST",
L"/v2/translate",
FormatString("text=%S&auth_key=%S&target_lang=%S", Escape(text), apiKey.Copy(), translateTo.Copy()),
L"Content-Type: application/x-www-form-urlencoded"
})
// Response formatted as JSON: translation starts with text":" and ends with "}]
if (std::wsmatch results; std::regex_search(httpRequest.response, results, std::wregex(L"text\":\"(.+?)\"\\}\\]"))) return { true, results[1] };
else return { false, FormatString(L"%s: %s", TRANSLATION_ERROR, httpRequest.response) };
else return { false, FormatString(L"%s (code=%u)", TRANSLATION_ERROR, httpRequest.errorCode) };
// the following code was reverse engineered from the DeepL website; it's as close as I could make it but I'm not sure what parts of this could be removed and still have it work
int64_t r = _time64(nullptr), n = std::count(text.begin(), text.end(), L'i') + 1LL;
static std::atomic<int> id = 10000 * std::uniform_int_distribution(0, 9999)(std::mt19937(std::random_device()()));
std::string jsonText;
for (auto ch : WideStringToString(text))
if (ch == '"') jsonText += "\\\"";
else jsonText += ch;
// user_preferred_langs? what should preferred_num_beans and priority be? does timestamp do anything? other translation quality options?
int64_t r = _time64(nullptr), n = std::count(text.begin(), text.end(), L'i') + 1;
int id = 10000 * std::uniform_int_distribution(0, 9999)(std::mt19937(std::random_device()()));
// user_preferred_langs? what should priority be? does timestamp do anything? other translation quality options?
auto body = FormatString(R"(
{
"id": %d,
@ -75,39 +60,35 @@ std::pair<bool, std::wstring> Translate(const std::wstring& text, SentenceInfo s
"priority": -1,
"timestamp": %lld,
"lang": {
"source_lang_user_selected": "auto",
"target_lang": "%s"
"target_lang": "%S",
"source_lang_user_selected": "auto"
},
"jobs": [{
"kind": "default",
"preferred_num_beams": 4,
"quality": "fast",
"raw_en_context_after": [],
"raw_en_sentence": "%s",
"raw_en_context_before": [%s]
"raw_en_context_before": [],
"kind": "default",
"preferred_num_beams": 1,
"quality": "fast",
"raw_en_context_after": []
}]
}
}
)", ++id, r + (n - r % n), WideStringToString(translateTo->c_str()), jsonText, useContext ? WideStringToString(context->operator[](sentenceInfo["text number"])) : "");
context->insert_or_assign(sentenceInfo["text number"], L'"' + text + L'"');
)", ++id, r + (n - r % n), translateTo.Copy(), JSON::Escape(text));
// missing accept-encoding header since it fucks up HttpRequest
std::wstring headers = L"Host: www2.deepl.com\r\nAccept-Language: en-US,en;q=0.5\r\nContent-type: text/plain\r\nOrigin: https://www.deepl.com\r\nTE: Trailers" + LMTBID.Acquire().contents;
std::wstring headers = L"Host: www2.deepl.com\r\nAccept-Language: en-US,en;q=0.5\r\nContent-type: text/plain; charset=utf-8\r\nOrigin: https://www.deepl.com\r\nTE: Trailers" + LMTBID.Acquire().contents;
if (HttpRequest httpRequest{
L"Mozilla/5.0 Textractor",
L"www2.deepl.com",
L"POST",
L"/jsonrpc",
body,
headers.c_str(),
L"https://www.deepl.com/translator",
WINHTTP_FLAG_SECURE,
NULL,
L"https://www.deepl.com/translator",
accept,
headers.c_str(),
body.data(),
body.size()
accept
})
{
auto LMTBID = httpRequest.headers.find(L"LMTBID="), end = httpRequest.headers.find(L';', LMTBID); // not sure if this cookie does anything
if (LMTBID != std::wstring::npos && end != std::wstring::npos) ::LMTBID->assign(L"\r\nCookie: " + httpRequest.headers.substr(LMTBID, end - LMTBID));
// Response formatted as JSON: translation starts with preprocessed_sentence":" and ends with ","
if (std::wsmatch results; std::regex_search(httpRequest.response, results, std::wregex(L"postprocessed_sentence\":\"(.+?)\",\""))) return { true, results[1] };
else return { false, FormatString(L"%s: %s", TRANSLATION_ERROR, httpRequest.response) };

View File

@ -4,13 +4,11 @@
#include <ctime>
extern const wchar_t* TRANSLATION_ERROR;
extern const char* API_KEY;
extern QFormLayout* display;
extern QSettings settings;
extern Synchronized<std::wstring> translateTo;
extern Synchronized<std::wstring> translateTo, apiKey;
const char* TRANSLATION_PROVIDER = "Google Translate";
const char* GET_API_KEY_FROM = "https://codelabs.developers.google.com/codelabs/cloud-translation-intro";
QStringList languages
{
"Afrikaans: af",
@ -125,43 +123,16 @@ QStringList languages
};
bool translateSelectedOnly = false, rateLimitAll = true, rateLimitSelected = false, useCache = true;
int tokenCount = 30, tokenRestoreDelay = 60000;
Synchronized<std::wstring> key;
int tokenCount = 30, tokenRestoreDelay = 60000, maxSentenceSize = 500;
unsigned TKK = 0;
BOOL WINAPI DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved)
{
switch (ul_reason_for_call)
{
case DLL_PROCESS_ATTACH:
{
auto keyInput = new QLineEdit(settings.value(API_KEY).toString());
key->assign(S(keyInput->text()));
QObject::connect(keyInput, &QLineEdit::textChanged, [](QString key) { settings.setValue(API_KEY, S(::key->assign(S(key)))); });
display->addRow(API_KEY, keyInput);
auto googleCloudInfo = new QLabel(
"<a href=\"https://codelabs.developers.google.com/codelabs/cloud-translation-intro\">https://codelabs.developers.google.com/codelabs/cloud-translation-intro</a>"
);
googleCloudInfo->setOpenExternalLinks(true);
display->addRow(googleCloudInfo);
}
break;
case DLL_PROCESS_DETACH:
{
}
break;
}
return TRUE;
}
std::wstring GetTranslationUri(const std::wstring& text)
{
// If no TKK available, use this uri. Can't use too much or google will detect unauthorized access
if (!TKK) return FormatString(L"/translate_a/single?client=gtx&dt=ld&dt=rm&dt=t&tl=%s&q=%s", translateTo->c_str(), text);
if (!TKK) return FormatString(L"/translate_a/single?client=gtx&dt=ld&dt=rm&dt=t&tl=%s&q=%s", translateTo.Copy(), Escape(text));
// Artikash 8/19/2018: reverse engineered from translate.google.com
// reverse engineered from translate.google.com
std::wstring escapedText;
unsigned a = time(NULL) / 3600, b = a; // the first part of TKK
for (unsigned char ch : WideStringToString(text))
@ -177,7 +148,7 @@ std::wstring GetTranslationUri(const std::wstring& text)
a ^= TKK;
a %= 1000000;
return FormatString(L"/translate_a/single?client=webapp&dt=ld&dt=rm&dt=t&sl=auto&tl=%s&tk=%u.%u&q=%s", translateTo->c_str(), a, a ^ b, escapedText);
return FormatString(L"/translate_a/single?client=webapp&dt=ld&dt=rm&dt=t&sl=auto&tl=%s&tk=%u.%u&q=%s", translateTo.Copy(), a, a ^ b, escapedText);
}
bool IsHash(const std::wstring& result)
@ -187,13 +158,13 @@ bool IsHash(const std::wstring& result)
std::pair<bool, std::wstring> Translate(const std::wstring& text, SentenceInfo)
{
if (!key->empty())
{
if (!apiKey->empty())
if (HttpRequest httpRequest{
L"Mozilla/5.0 Textractor",
L"translation.googleapis.com",
L"GET",
FormatString(L"/language/translate/v2?format=text&q=%s&target=%s&key=%s", Escape(text), translateTo->c_str(), key->c_str()).c_str()
L"POST",
FormatString(L"/language/translate/v2?format=text&target=%s&key=%s", translateTo.Copy(), apiKey.Copy()).c_str(),
FormatString(R"({"q":["%s"]})", JSON::Escape(text))
})
{
// Response formatted as JSON: starts with "translatedText": " and translation is enclosed in quotes followed by a comma
@ -201,7 +172,6 @@ std::pair<bool, std::wstring> Translate(const std::wstring& text, SentenceInfo)
return { false, FormatString(L"%s: %s", TRANSLATION_ERROR, httpRequest.response) };
}
else return { false, FormatString(L"%s (code=%u)", TRANSLATION_ERROR, httpRequest.errorCode) };
}
if (!TKK)
if (HttpRequest httpRequest{ L"Mozilla/5.0 Textractor", L"translate.google.com", L"GET", L"/" })

View File

@ -91,7 +91,7 @@ bool ProcessSentence(std::wstring& sentence, SentenceInfo sentenceInfo)
{
revCount = ::revCount;
luaL_dostring(L, "ProcessSentence = nil");
if (luaL_dostring(L, script->c_str()) != LUA_OK)
if (luaL_dostring(L, script.Copy().c_str()) != LUA_OK)
{
sentence += L"\n" + FormatString(LUA_ERROR, StringToWideString(lua_tolstring(L, 1, nullptr)));
lua_settop(L, 0);

View File

@ -5,13 +5,12 @@ HttpRequest::HttpRequest(
const wchar_t* serverName,
const wchar_t* action,
const wchar_t* objectName,
std::string body,
const wchar_t* headers,
const wchar_t* referrer,
DWORD requestFlags,
const wchar_t* httpVersion,
const wchar_t* referrer,
const wchar_t** acceptTypes,
const wchar_t* headers,
void* body,
DWORD bodyLength
const wchar_t** acceptTypes
)
{
static std::atomic<HINTERNET> internet = NULL;
@ -19,13 +18,13 @@ HttpRequest::HttpRequest(
if (internet)
if (InternetHandle connection = WinHttpConnect(internet, serverName, INTERNET_DEFAULT_HTTPS_PORT, 0))
if (InternetHandle request = WinHttpOpenRequest(connection, action, objectName, httpVersion, referrer, acceptTypes, requestFlags))
if (WinHttpSendRequest(request, headers, -1UL, body, bodyLength, bodyLength, NULL))
if (WinHttpSendRequest(request, headers, -1UL, body.empty() ? NULL : body.data(), body.size(), body.size(), NULL))
{
WinHttpReceiveResponse(request, NULL);
DWORD size = 0;
WinHttpQueryHeaders(request, WINHTTP_QUERY_RAW_HEADERS_CRLF, WINHTTP_HEADER_NAME_BY_INDEX, NULL, &size, WINHTTP_NO_HEADER_INDEX);
this->headers.resize(size);
WinHttpQueryHeaders(request, WINHTTP_QUERY_RAW_HEADERS_CRLF, WINHTTP_HEADER_NAME_BY_INDEX, this->headers.data(), &size, WINHTTP_NO_HEADER_INDEX);
//WinHttpReceiveResponse(request, NULL);
//DWORD size = 0;
//WinHttpQueryHeaders(request, WINHTTP_QUERY_RAW_HEADERS_CRLF, WINHTTP_HEADER_NAME_BY_INDEX, NULL, &size, WINHTTP_NO_HEADER_INDEX);
//this->headers.resize(size);
//WinHttpQueryHeaders(request, WINHTTP_QUERY_RAW_HEADERS_CRLF, WINHTTP_HEADER_NAME_BY_INDEX, this->headers.data(), &size, WINHTTP_NO_HEADER_INDEX);
std::string data;
DWORD availableSize, downloadedSize;
do
@ -54,18 +53,44 @@ std::wstring Escape(const std::wstring& text)
return escaped;
}
void Unescape(std::wstring& text)
namespace JSON
{
for (int i = 0; i < text.size(); ++i)
void Unescape(std::wstring& text)
{
if (text[i] == L'\\')
for (int i = 0; i < text.size(); ++i)
{
text[i] = 0;
if (text[i + 1] == L'r') text[i + 1] = 0; // for some reason \r gets displayed as a newline
if (text[i + 1] == L'n') text[i + 1] = L'\n';
if (text[i + 1] == L't') text[i + 1] = L'\t';
if (text[i + 1] == L'\\') ++i;
if (text[i] == L'\\')
{
text[i] = 0;
if (text[i + 1] == L'r') text[i + 1] = 0; // for some reason \r gets displayed as a newline
if (text[i + 1] == L'n') text[i + 1] = L'\n';
if (text[i + 1] == L't') text[i + 1] = L'\t';
if (text[i + 1] == L'\\') ++i;
}
}
text.erase(std::remove(text.begin(), text.end(), 0), text.end());
}
std::string Escape(const std::wstring& text)
{
std::string escaped = WideStringToString(text);
int oldSize = escaped.size();
escaped.resize(escaped.size() + std::count_if(escaped.begin(), escaped.end(), [](char ch) { return ch == '\n' || ch == '\r' || ch == '\t' || ch == '\\' || ch == '"'; }));
auto out = escaped.rbegin();
for (int i = oldSize - 1; i >= 0; --i)
{
if (escaped[i] == '\n') *out++ = 'n';
else if (escaped[i] == '\t') *out++ = 't';
else if (escaped[i] == '\r') *out++ = 'r';
else if (escaped[i] == '\\' || escaped[i] == '"') *out++ = escaped[i];
else
{
*out++ = escaped[i];
continue;
}
*out++ = '\\';
}
escaped.erase(std::remove_if(escaped.begin(), escaped.end(), [](unsigned char ch) { return ch < 0x20; }), escaped.end());
return escaped;
}
text.erase(std::remove(text.begin(), text.end(), 0), text.end());
}

View File

@ -12,13 +12,12 @@ struct HttpRequest
const wchar_t* serverName,
const wchar_t* action,
const wchar_t* objectName,
std::string body = "",
const wchar_t* headers = NULL,
const wchar_t* referrer = NULL,
DWORD requestFlags = WINHTTP_FLAG_SECURE | WINHTTP_FLAG_ESCAPE_DISABLE,
const wchar_t* httpVersion = NULL,
const wchar_t* referrer = NULL,
const wchar_t** acceptTypes = NULL,
const wchar_t* headers = NULL,
void* body = NULL,
DWORD bodyLength = 0
const wchar_t** acceptTypes = NULL
);
operator bool() { return errorCode == ERROR_SUCCESS; }
@ -30,4 +29,9 @@ struct HttpRequest
};
std::wstring Escape(const std::wstring& text);
void Unescape(std::wstring& text);
namespace JSON
{
void Unescape(std::wstring& text);
std::string Escape(const std::wstring& text);
}

View File

@ -14,12 +14,15 @@ extern const char* RATE_LIMIT_SELECTED_THREAD;
extern const char* USE_TRANS_CACHE;
extern const char* RATE_LIMIT_TOKEN_COUNT;
extern const char* RATE_LIMIT_TOKEN_RESTORE_DELAY;
extern const char* MAX_SENTENCE_SIZE;
extern const char* API_KEY;
extern const wchar_t* TOO_MANY_TRANS_REQUESTS;
extern const char* TRANSLATION_PROVIDER;
extern const char* GET_API_KEY_FROM;
extern QStringList languages;
extern bool translateSelectedOnly, rateLimitAll, rateLimitSelected, useCache;
extern int tokenCount, tokenRestoreDelay;
extern int tokenCount, tokenRestoreDelay, maxSentenceSize;
std::pair<bool, std::wstring> Translate(const std::wstring& text, SentenceInfo sentenceInfo);
const char* LANGUAGE = u8"Language";
@ -27,7 +30,7 @@ const std::string TRANSLATION_CACHE_FILE = FormatString("%s Cache.txt", TRANSLAT
QFormLayout* display;
QSettings settings = openSettings();
Synchronized<std::wstring> translateTo = L"en";
Synchronized<std::wstring> translateTo = L"en", apiKey;
Synchronized<std::map<std::wstring, std::wstring>> translationCache;
int savedSize;
@ -77,6 +80,7 @@ public:
for (auto [value, label] : Array<int&, const char*>{
{ tokenCount, RATE_LIMIT_TOKEN_COUNT },
{ tokenRestoreDelay, RATE_LIMIT_TOKEN_RESTORE_DELAY },
{ maxSentenceSize, MAX_SENTENCE_SIZE },
})
{
value = settings.value(label, value).toInt();
@ -86,6 +90,15 @@ public:
display->addRow(label, spinBox);
connect(spinBox, qOverload<int>(&QSpinBox::valueChanged), [label, &value](int newValue) { settings.setValue(label, value = newValue); });
}
if (GET_API_KEY_FROM)
{
auto keyInput = new QLineEdit(settings.value(API_KEY).toString());
apiKey->assign(S(keyInput->text()));
QObject::connect(keyInput, &QLineEdit::textChanged, [](QString key) { settings.setValue(API_KEY, S(apiKey->assign(S(key)))); });
auto keyLabel = new QLabel(QString("<a href=\"%1\">%2</a>").arg(GET_API_KEY_FROM, API_KEY), this);
keyLabel->setOpenExternalLinks(true);
display->addRow(keyLabel, keyInput);
}
setWindowTitle(TRANSLATION_PROVIDER);
QMetaObject::invokeMethod(this, &QWidget::show, Qt::QueuedConnection);
@ -115,7 +128,7 @@ private:
bool ProcessSentence(std::wstring& sentence, SentenceInfo sentenceInfo)
{
if (sentenceInfo["text number"] == 0) return false;
if (sentenceInfo["text number"] == 0 || sentence.size() > maxSentenceSize) return false;
static class
{
@ -146,11 +159,9 @@ bool ProcessSentence(std::wstring& sentence, SentenceInfo sentenceInfo)
if (cache) translationCache->try_emplace(sentence, translation);
if (cache && translationCache->size() > savedSize + 50) SaveCache();
Unescape(translation);
JSON::Unescape(translation);
sentence += L"\n" + translation;
return true;
}
TEST(
assert(Translate(L"こんにちは").second.find(L"ello") != std::wstring::npos)
);
TEST(assert(Translate(L"こんにちは").second.find(L"ello") != std::wstring::npos));

View File

@ -78,6 +78,11 @@ public:
Locker Acquire() { return { std::unique_lock(m), contents }; }
Locker operator->() { return Acquire(); }
T Copy()
{
return Acquire().contents;
}
private:
T contents;
M m;